{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0059880239520957, "eval_steps": 500, "global_step": 670, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "logps_train/policy_1_2": -230.5351104736328, "logps_train/policy_1_l": -196.75421142578125, "logps_train/policy_1_w": -192.50221252441406, "logps_train/policy_2_2": -204.13348388671875, "logps_train/policy_2_w": -216.84176635742188, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": -0.04726085811853409, "rewards_train/1-l": -0.08792252838611603, "rewards_train/1-w": -0.04123714938759804, "rewards_train/2-2": -0.044598862528800964, "rewards_train/2-w": 0.0033226963132619858, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.04668537899851799, "rewards_train/margins_1": 0.00602370873093605, "rewards_train/margins_2": -0.04792155884206295, "step": 0 }, { "epoch": 0, "logps_train/policy_1_2": -157.4934844970703, "logps_train/policy_1_l": -149.969970703125, "logps_train/policy_1_w": -152.83432006835938, "logps_train/policy_2_2": -146.33731079101562, "logps_train/policy_2_w": -168.31048583984375, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -0.04153573513031006, "rewards_train/1-l": -0.07092289626598358, "rewards_train/1-w": -0.013900126330554485, "rewards_train/2-2": -0.02435622364282608, "rewards_train/2-w": -0.02167436107993126, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.057022769935429096, "rewards_train/margins_1": 0.027635608799755573, "rewards_train/margins_2": -0.002681862562894821, "step": 0 }, { "epoch": 0, "logps_train/policy_1_2": -173.85934448242188, "logps_train/policy_1_l": -138.67086791992188, "logps_train/policy_1_w": -143.44061279296875, "logps_train/policy_2_2": -154.49261474609375, "logps_train/policy_2_w": -168.56600952148438, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.006252564489841461, "rewards_train/1-l": 0.019925180822610855, "rewards_train/1-w": 0.005937837064266205, "rewards_train/2-2": 0.00464552640914917, "rewards_train/2-w": 0.0012112017720937729, "rewards_train/accuracies": 0.375, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": -0.01398734375834465, "rewards_train/margins_1": -0.00031472742557525635, "rewards_train/margins_2": 0.003434324637055397, "step": 0 }, { "epoch": 0, "logps_train/policy_1_2": -106.37958526611328, "logps_train/policy_1_l": -90.11468505859375, "logps_train/policy_1_w": -92.64643096923828, "logps_train/policy_2_2": -92.89311218261719, "logps_train/policy_2_w": -111.07006072998047, "logps_train/ref_1_2": -106.5, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 0.012042188085615635, "rewards_train/1-l": 0.004937386140227318, "rewards_train/1-w": -0.02948683127760887, "rewards_train/2-2": 0.05131369084119797, "rewards_train/2-w": 0.00354078970849514, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": -0.03442421741783619, "rewards_train/margins_1": -0.041529019363224506, "rewards_train/margins_2": 0.04777290113270283, "step": 0 }, { "epoch": 0, "logps_train/policy_1_2": -116.73878479003906, "logps_train/policy_1_l": -116.78949737548828, "logps_train/policy_1_w": -150.69976806640625, "logps_train/policy_2_2": -101.51828002929688, "logps_train/policy_2_w": -169.82022094726562, "logps_train/ref_1_2": -116.5, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.03091052733361721, "rewards_train/1-l": 0.02183149755001068, "rewards_train/1-w": -0.09185127913951874, "rewards_train/2-2": 0.00598437525331974, "rewards_train/2-w": -0.02811610884964466, "rewards_train/accuracies": 0.125, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": -0.11368277668952942, "rewards_train/margins_1": -0.06094075180590153, "rewards_train/margins_2": 0.0341004841029644, "step": 0 }, { "epoch": 0, "logps_train/policy_1_2": -185.17111206054688, "logps_train/policy_1_l": -184.36947631835938, "logps_train/policy_1_w": -152.7649383544922, "logps_train/policy_2_2": -169.13307189941406, "logps_train/policy_2_w": -170.01678466796875, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.28117436170578003, "rewards_train/1-l": -0.060969989746809006, "rewards_train/1-w": -0.004618251696228981, "rewards_train/2-2": -0.23830722272396088, "rewards_train/2-w": 0.0002734959125518799, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.056351738050580025, "rewards_train/margins_1": 0.27655611000955105, "rewards_train/margins_2": -0.23858071863651276, "step": 0 }, { "epoch": 0, "logps_train/policy_1_2": -139.53805541992188, "logps_train/policy_1_l": -144.78195190429688, "logps_train/policy_1_w": -184.93743896484375, "logps_train/policy_2_2": -120.95767211914062, "logps_train/policy_2_w": -202.85296630859375, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 0.014943337067961693, "rewards_train/1-l": -0.020186379551887512, "rewards_train/1-w": 0.0257868729531765, "rewards_train/2-2": 0.02298278734087944, "rewards_train/2-w": -0.03998458385467529, "rewards_train/accuracies": 0.375, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 0.04597325250506401, "rewards_train/margins_1": 0.010843535885214806, "rewards_train/margins_2": 0.06296737119555473, "step": 0 }, { "epoch": 0, "logps_train/policy_1_2": -113.85527038574219, "logps_train/policy_1_l": -123.26825714111328, "logps_train/policy_1_w": -119.96627807617188, "logps_train/policy_2_2": -92.39627075195312, "logps_train/policy_2_w": -146.2642822265625, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.000410543754696846, "rewards_train/1-l": 0.01951197162270546, "rewards_train/1-w": -0.03022189810872078, "rewards_train/2-2": -0.009939629584550858, "rewards_train/2-w": -0.07017748057842255, "rewards_train/accuracies": 0.25, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": -0.04973386973142624, "rewards_train/margins_1": -0.030632441863417625, "rewards_train/margins_2": 0.06023785099387169, "step": 0 }, { "epoch": 0.0, "logps_train/policy_1_2": -118.14009857177734, "logps_train/policy_1_l": -125.07862854003906, "logps_train/policy_1_w": -158.88441467285156, "logps_train/policy_2_2": -110.10863494873047, "logps_train/policy_2_w": -176.76702880859375, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 0.0012249713763594627, "rewards_train/1-l": 0.015183579176664352, "rewards_train/1-w": -0.006410121917724609, "rewards_train/2-2": 0.0014416372869163752, "rewards_train/2-w": -0.052485156804323196, "rewards_train/accuracies": 0.25, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": -0.021593701094388962, "rewards_train/margins_1": -0.007635093294084072, "rewards_train/margins_2": 0.05392679409123957, "step": 1 }, { "epoch": 0.0, "logps_train/policy_1_2": -201.3710479736328, "logps_train/policy_1_l": -206.0267333984375, "logps_train/policy_1_w": -156.53201293945312, "logps_train/policy_2_2": -178.99803161621094, "logps_train/policy_2_w": -188.16973876953125, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -0.07069939374923706, "rewards_train/1-l": -0.020543716847896576, "rewards_train/1-w": 0.06300993263721466, "rewards_train/2-2": -0.09218639135360718, "rewards_train/2-w": -0.000861200038343668, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 0.08355364948511124, "rewards_train/margins_1": 0.13370932638645172, "rewards_train/margins_2": -0.09132519131526351, "step": 1 }, { "epoch": 0.0, "logps_train/policy_1_2": -134.76528930664062, "logps_train/policy_1_l": -93.81011962890625, "logps_train/policy_1_w": -113.64286804199219, "logps_train/policy_2_2": -114.41255950927734, "logps_train/policy_2_w": -132.7657012939453, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": -0.0868794173002243, "rewards_train/1-l": 0.02777741476893425, "rewards_train/1-w": -0.0627242773771286, "rewards_train/2-2": -0.04418618232011795, "rewards_train/2-w": -0.05313291400671005, "rewards_train/accuracies": 0.375, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": -0.09050169214606285, "rewards_train/margins_1": 0.024155139923095703, "rewards_train/margins_2": 0.008946731686592102, "step": 1 }, { "epoch": 0.0, "logps_train/policy_1_2": -156.0883026123047, "logps_train/policy_1_l": -148.03256225585938, "logps_train/policy_1_w": -96.1266860961914, "logps_train/policy_2_2": -136.44960021972656, "logps_train/policy_2_w": -111.0531005859375, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": -0.12914210557937622, "rewards_train/1-l": -0.06448669731616974, "rewards_train/1-w": -0.011887443251907825, "rewards_train/2-2": -0.10433564335107803, "rewards_train/2-w": -0.0021852264180779457, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.05259925406426191, "rewards_train/margins_1": 0.1172546623274684, "rewards_train/margins_2": -0.10215041693300009, "step": 1 }, { "epoch": 0.0, "logps_train/policy_1_2": -203.43338012695312, "logps_train/policy_1_l": -127.79170989990234, "logps_train/policy_1_w": -146.16766357421875, "logps_train/policy_2_2": -189.12059020996094, "logps_train/policy_2_w": -162.57513427734375, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -0.09333750605583191, "rewards_train/1-l": 0.035672351717948914, "rewards_train/1-w": -0.012859940528869629, "rewards_train/2-2": -0.04604348540306091, "rewards_train/2-w": 0.04092440381646156, "rewards_train/accuracies": 0.375, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": -0.04853229224681854, "rewards_train/margins_1": 0.08047756552696228, "rewards_train/margins_2": -0.08696788921952248, "step": 1 }, { "epoch": 0.0, "logps_train/policy_1_2": -236.53834533691406, "logps_train/policy_1_l": -204.41468811035156, "logps_train/policy_1_w": -137.1268768310547, "logps_train/policy_2_2": -206.81283569335938, "logps_train/policy_2_w": -160.1265106201172, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -0.05227218568325043, "rewards_train/1-l": 0.0026716627180576324, "rewards_train/1-w": -0.013469171710312366, "rewards_train/2-2": -0.049251772463321686, "rewards_train/2-w": -0.05405684933066368, "rewards_train/accuracies": 0.375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": -0.01614083442837, "rewards_train/margins_1": 0.03880301397293806, "rewards_train/margins_2": 0.004805076867341995, "step": 1 }, { "epoch": 0.0, "logps_train/policy_1_2": -138.90724182128906, "logps_train/policy_1_l": -143.15899658203125, "logps_train/policy_1_w": -194.23318481445312, "logps_train/policy_2_2": -122.06503295898438, "logps_train/policy_2_w": -222.28749084472656, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -0.042286086827516556, "rewards_train/1-l": -0.03406412899494171, "rewards_train/1-w": -0.06550464779138565, "rewards_train/2-2": -0.0188077874481678, "rewards_train/2-w": 0.035313680768013, "rewards_train/accuracies": 0.375, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": -0.03144051879644394, "rewards_train/margins_1": -0.023218560963869095, "rewards_train/margins_2": -0.0541214682161808, "step": 1 }, { "epoch": 0.0, "logps_train/policy_1_2": -157.19998168945312, "logps_train/policy_1_l": -133.61004638671875, "logps_train/policy_1_w": -141.10826110839844, "logps_train/policy_2_2": -143.16917419433594, "logps_train/policy_2_w": -154.51791381835938, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 0.04054874926805496, "rewards_train/1-l": 0.009696991182863712, "rewards_train/1-w": 0.05831463262438774, "rewards_train/2-2": 0.030738279223442078, "rewards_train/2-w": 0.12438022345304489, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 0.04861764144152403, "rewards_train/margins_1": 0.01776588335633278, "rewards_train/margins_2": -0.09364194422960281, "step": 1 }, { "epoch": 0.01, "learning_rate": 2.9411764705882356e-07, "loss": 2.1161, "step": 2 }, { "epoch": 0.01, "logps_train/policy_1_2": -185.81484985351562, "logps_train/policy_1_l": -250.996337890625, "logps_train/policy_1_w": -169.2049560546875, "logps_train/policy_2_2": -164.0032501220703, "logps_train/policy_2_w": -194.84457397460938, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -250.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": -0.01586015336215496, "rewards_train/1-l": -0.12932220101356506, "rewards_train/1-w": 0.007630585692822933, "rewards_train/2-2": -0.052668094635009766, "rewards_train/2-w": -0.02039489895105362, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 0.136952786706388, "rewards_train/margins_1": 0.023490739054977894, "rewards_train/margins_2": -0.032273195683956146, "step": 2 }, { "epoch": 0.01, "logps_train/policy_1_2": -95.62840270996094, "logps_train/policy_1_l": -61.618896484375, "logps_train/policy_1_w": -103.88514709472656, "logps_train/policy_2_2": -82.30894470214844, "logps_train/policy_2_w": -114.80191040039062, "logps_train/ref_1_2": -96.5, "logps_train/ref_1_l": -61.25, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -82.5, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": 0.05512797832489014, "rewards_train/1-l": -0.02165515162050724, "rewards_train/1-w": 0.022423017770051956, "rewards_train/2-2": 0.01363616157323122, "rewards_train/2-w": 0.0362151637673378, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.044078169390559196, "rewards_train/margins_1": -0.03270496055483818, "rewards_train/margins_2": -0.02257900219410658, "step": 2 }, { "epoch": 0.01, "logps_train/policy_1_2": -239.86123657226562, "logps_train/policy_1_l": -156.3731689453125, "logps_train/policy_1_w": -222.72959899902344, "logps_train/policy_2_2": -219.45510864257812, "logps_train/policy_2_w": -253.75514221191406, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -219.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": -0.022844448685646057, "rewards_train/1-l": -0.032630205154418945, "rewards_train/1-w": 0.03329124301671982, "rewards_train/2-2": -0.022073792293667793, "rewards_train/2-w": -0.0005138441920280457, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.06592144817113876, "rewards_train/margins_1": 0.056135691702365875, "rewards_train/margins_2": -0.021559948101639748, "step": 2 }, { "epoch": 0.01, "logps_train/policy_1_2": -169.946533203125, "logps_train/policy_1_l": -197.74473571777344, "logps_train/policy_1_w": -120.33091735839844, "logps_train/policy_2_2": -145.14390563964844, "logps_train/policy_2_w": -141.633056640625, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 0.03190997615456581, "rewards_train/1-l": -0.07525498420000076, "rewards_train/1-w": -0.04598233848810196, "rewards_train/2-2": 0.009436512365937233, "rewards_train/2-w": -0.06721296906471252, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.029272645711898804, "rewards_train/margins_1": -0.07789231464266777, "rewards_train/margins_2": 0.07664948143064976, "step": 2 }, { "epoch": 0.01, "logps_train/policy_1_2": -241.19381713867188, "logps_train/policy_1_l": -166.14479064941406, "logps_train/policy_1_w": -229.17156982421875, "logps_train/policy_2_2": -206.13693237304688, "logps_train/policy_2_w": -257.3565368652344, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -228.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": -0.08500662446022034, "rewards_train/1-l": -0.007936796173453331, "rewards_train/1-w": -0.049970339983701706, "rewards_train/2-2": -0.05431704595685005, "rewards_train/2-w": -0.09346609562635422, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": -0.042033543810248375, "rewards_train/margins_1": 0.03503628447651863, "rewards_train/margins_2": 0.039149049669504166, "step": 2 }, { "epoch": 0.01, "logps_train/policy_1_2": -182.19989013671875, "logps_train/policy_1_l": -204.97079467773438, "logps_train/policy_1_w": -158.6148223876953, "logps_train/policy_2_2": -158.28355407714844, "logps_train/policy_2_w": -188.33221435546875, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -0.0004582880064845085, "rewards_train/1-l": 0.05135762691497803, "rewards_train/1-w": -0.056794583797454834, "rewards_train/2-2": 0.004846848547458649, "rewards_train/2-w": -0.05744057148694992, "rewards_train/accuracies": 0.25, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": -0.10815221071243286, "rewards_train/margins_1": -0.056336295790970325, "rewards_train/margins_2": 0.06228742003440857, "step": 2 }, { "epoch": 0.01, "logps_train/policy_1_2": -202.21295166015625, "logps_train/policy_1_l": -170.80343627929688, "logps_train/policy_1_w": -165.183837890625, "logps_train/policy_2_2": -181.65426635742188, "logps_train/policy_2_w": -182.85060119628906, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 0.039641689509153366, "rewards_train/1-l": 0.007156671024858952, "rewards_train/1-w": 0.05231952294707298, "rewards_train/2-2": -0.025582756847143173, "rewards_train/2-w": 0.01650264486670494, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.04516285192221403, "rewards_train/margins_1": 0.012677833437919617, "rewards_train/margins_2": -0.042085401713848114, "step": 2 }, { "epoch": 0.01, "logps_train/policy_1_2": -197.0331268310547, "logps_train/policy_1_l": -177.98989868164062, "logps_train/policy_1_w": -202.70962524414062, "logps_train/policy_2_2": -172.3651123046875, "logps_train/policy_2_w": -229.32424926757812, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -203.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 0.059187036007642746, "rewards_train/1-l": -0.06293058395385742, "rewards_train/1-w": 0.010288168676197529, "rewards_train/2-2": 0.01661243475973606, "rewards_train/2-w": 0.02070004865527153, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.07321875263005495, "rewards_train/margins_1": -0.04889886733144522, "rewards_train/margins_2": -0.004087613895535469, "step": 2 }, { "epoch": 0.01, "logps_train/policy_1_2": -177.91514587402344, "logps_train/policy_1_l": -135.09402465820312, "logps_train/policy_1_w": -124.92752838134766, "logps_train/policy_2_2": -162.56935119628906, "logps_train/policy_2_w": -149.3452606201172, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -0.04776468500494957, "rewards_train/1-l": -0.03635461628437042, "rewards_train/1-w": 0.041816819459199905, "rewards_train/2-2": -0.02959148958325386, "rewards_train/2-w": 0.007661295123398304, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.07817143574357033, "rewards_train/margins_1": 0.08958150446414948, "rewards_train/margins_2": -0.037252784706652164, "step": 3 }, { "epoch": 0.01, "logps_train/policy_1_2": -162.3770751953125, "logps_train/policy_1_l": -95.85835266113281, "logps_train/policy_1_w": -140.12449645996094, "logps_train/policy_2_2": -141.82846069335938, "logps_train/policy_2_w": -163.67477416992188, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -0.009583305567502975, "rewards_train/1-l": -0.002631913870573044, "rewards_train/1-w": -0.02651193179190159, "rewards_train/2-2": 0.028873350471258163, "rewards_train/2-w": 0.026271916925907135, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": -0.023880017921328545, "rewards_train/margins_1": -0.016928626224398613, "rewards_train/margins_2": 0.0026014335453510284, "step": 3 }, { "epoch": 0.01, "logps_train/policy_1_2": -185.12252807617188, "logps_train/policy_1_l": -115.35796356201172, "logps_train/policy_1_w": -129.16497802734375, "logps_train/policy_2_2": -162.40948486328125, "logps_train/policy_2_w": -158.61947631835938, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -115.5, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 0.001808883622288704, "rewards_train/1-l": 0.0166452806442976, "rewards_train/1-w": -0.04218088090419769, "rewards_train/2-2": -0.012042328715324402, "rewards_train/2-w": -0.04944808781147003, "rewards_train/accuracies": 0.25, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": -0.05882616154849529, "rewards_train/margins_1": -0.0439897645264864, "rewards_train/margins_2": 0.03740575909614563, "step": 3 }, { "epoch": 0.01, "logps_train/policy_1_2": -197.11253356933594, "logps_train/policy_1_l": -140.2729034423828, "logps_train/policy_1_w": -142.72543334960938, "logps_train/policy_2_2": -178.9091796875, "logps_train/policy_2_w": -160.44607543945312, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -0.003439829684793949, "rewards_train/1-l": -0.008539659902453423, "rewards_train/1-w": 0.016909414902329445, "rewards_train/2-2": 0.05595610290765762, "rewards_train/2-w": 0.016330789774656296, "rewards_train/accuracies": 0.375, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.025449074804782867, "rewards_train/margins_1": 0.020349244587123394, "rewards_train/margins_2": 0.03962531313300133, "step": 3 }, { "epoch": 0.01, "logps_train/policy_1_2": -84.16588592529297, "logps_train/policy_1_l": -100.3232421875, "logps_train/policy_1_w": -125.75065612792969, "logps_train/policy_2_2": -72.94132232666016, "logps_train/policy_2_w": -142.69520568847656, "logps_train/ref_1_2": -84.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -73.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -0.022057652473449707, "rewards_train/1-l": 0.09502007067203522, "rewards_train/1-w": -0.07740853726863861, "rewards_train/2-2": 0.012899257242679596, "rewards_train/2-w": -0.10155297815799713, "rewards_train/accuracies": 0.375, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": -0.17242860794067383, "rewards_train/margins_1": -0.055350884795188904, "rewards_train/margins_2": 0.11445223540067673, "step": 3 }, { "epoch": 0.01, "logps_train/policy_1_2": -115.63777923583984, "logps_train/policy_1_l": -113.31259155273438, "logps_train/policy_1_w": -107.66036224365234, "logps_train/policy_2_2": -95.98211669921875, "logps_train/policy_2_w": -129.534423828125, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 0.0018466007895767689, "rewards_train/1-l": -0.008602714166045189, "rewards_train/1-w": 0.011502855457365513, "rewards_train/2-2": 0.012335442937910557, "rewards_train/2-w": 0.007495557889342308, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.020105569623410702, "rewards_train/margins_1": 0.009656254667788744, "rewards_train/margins_2": 0.004839885048568249, "step": 3 }, { "epoch": 0.01, "logps_train/policy_1_2": -215.37258911132812, "logps_train/policy_1_l": -189.89080810546875, "logps_train/policy_1_w": -186.45480346679688, "logps_train/policy_2_2": -179.80441284179688, "logps_train/policy_2_w": -223.82504272460938, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -0.14741580188274384, "rewards_train/1-l": -0.08029285073280334, "rewards_train/1-w": -0.10758907347917557, "rewards_train/2-2": -0.02965966984629631, "rewards_train/2-w": -0.12684014439582825, "rewards_train/accuracies": 0.375, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": -0.027296222746372223, "rewards_train/margins_1": 0.03982672840356827, "rewards_train/margins_2": 0.09718047454953194, "step": 3 }, { "epoch": 0.01, "logps_train/policy_1_2": -174.90444946289062, "logps_train/policy_1_l": -117.37577819824219, "logps_train/policy_1_w": -143.25180053710938, "logps_train/policy_2_2": -145.90277099609375, "logps_train/policy_2_w": -176.13998413085938, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -0.0420084223151207, "rewards_train/1-l": -0.016483891755342484, "rewards_train/1-w": -0.022055866196751595, "rewards_train/2-2": -0.008246398530900478, "rewards_train/2-w": -0.04524803161621094, "rewards_train/accuracies": 0.375, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": -0.005571974441409111, "rewards_train/margins_1": 0.019952556118369102, "rewards_train/margins_2": 0.03700163308531046, "step": 3 }, { "epoch": 0.01, "learning_rate": 5.882352941176471e-07, "loss": 2.0942, "step": 4 }, { "epoch": 0.01, "logps_train/policy_1_2": -259.03387451171875, "logps_train/policy_1_l": -272.68048095703125, "logps_train/policy_1_w": -206.74822998046875, "logps_train/policy_2_2": -226.52761840820312, "logps_train/policy_2_w": -249.71923828125, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -272.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -226.0, "logps_train/ref_2_w": -250.0, "rewards_train/1-2": -0.03126702457666397, "rewards_train/1-l": -0.10515855252742767, "rewards_train/1-w": 0.04314546659588814, "rewards_train/2-2": -0.03401058167219162, "rewards_train/2-w": 0.03354417160153389, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 0.1483040191233158, "rewards_train/margins_1": 0.07441249117255211, "rewards_train/margins_2": -0.06755475327372551, "step": 4 }, { "epoch": 0.01, "logps_train/policy_1_2": -173.88670349121094, "logps_train/policy_1_l": -103.00178527832031, "logps_train/policy_1_w": -160.93746948242188, "logps_train/policy_2_2": -148.41578674316406, "logps_train/policy_2_w": -199.48728942871094, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -0.04863175004720688, "rewards_train/1-l": 0.03341559320688248, "rewards_train/1-w": -0.05624747276306152, "rewards_train/2-2": 0.0021716682240366936, "rewards_train/2-w": -0.10810370743274689, "rewards_train/accuracies": 0.375, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": -0.089663065969944, "rewards_train/margins_1": -0.007615722715854645, "rewards_train/margins_2": 0.11027537565678358, "step": 4 }, { "epoch": 0.01, "logps_train/policy_1_2": -149.99295043945312, "logps_train/policy_1_l": -109.82830810546875, "logps_train/policy_1_w": -113.0655517578125, "logps_train/policy_2_2": -129.95654296875, "logps_train/policy_2_w": -137.0756378173828, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -113.5, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": -0.03210711479187012, "rewards_train/1-l": -0.11447139829397202, "rewards_train/1-w": 0.01610173098742962, "rewards_train/2-2": -0.04917006567120552, "rewards_train/2-w": -0.11693966388702393, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.13057312928140163, "rewards_train/margins_1": 0.048208845779299736, "rewards_train/margins_2": 0.0677695982158184, "step": 4 }, { "epoch": 0.01, "logps_train/policy_1_2": -169.9332275390625, "logps_train/policy_1_l": -183.35333251953125, "logps_train/policy_1_w": -170.93099975585938, "logps_train/policy_2_2": -146.92620849609375, "logps_train/policy_2_w": -191.27552795410156, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -0.07769849896430969, "rewards_train/1-l": -0.04548913240432739, "rewards_train/1-w": 0.025649311020970345, "rewards_train/2-2": -0.10551092028617859, "rewards_train/2-w": -0.018176913261413574, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.125, "rewards_train/margins": 0.07113844342529774, "rewards_train/margins_1": 0.10334780998528004, "rewards_train/margins_2": -0.08733400702476501, "step": 4 }, { "epoch": 0.01, "logps_train/policy_1_2": -139.9715576171875, "logps_train/policy_1_l": -123.88804626464844, "logps_train/policy_1_w": -91.99031066894531, "logps_train/policy_2_2": -115.0951919555664, "logps_train/policy_2_w": -122.83380126953125, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 0.0036251135170459747, "rewards_train/1-l": 0.04244527593255043, "rewards_train/1-w": -0.027155950665473938, "rewards_train/2-2": -0.03725350275635719, "rewards_train/2-w": -0.006818009540438652, "rewards_train/accuracies": 0.25, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": -0.06960122659802437, "rewards_train/margins_1": -0.030781064182519913, "rewards_train/margins_2": -0.03043549321591854, "step": 4 }, { "epoch": 0.01, "logps_train/policy_1_2": -129.71006774902344, "logps_train/policy_1_l": -118.5057601928711, "logps_train/policy_1_w": -169.2382354736328, "logps_train/policy_2_2": -110.33747863769531, "logps_train/policy_2_w": -192.11550903320312, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -0.15318450331687927, "rewards_train/1-l": -0.24413076043128967, "rewards_train/1-w": -0.008199140429496765, "rewards_train/2-2": -0.07056422531604767, "rewards_train/2-w": -0.000613020732998848, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.2359316200017929, "rewards_train/margins_1": 0.1449853628873825, "rewards_train/margins_2": -0.06995120458304882, "step": 4 }, { "epoch": 0.01, "logps_train/policy_1_2": -160.78546142578125, "logps_train/policy_1_l": -131.96017456054688, "logps_train/policy_1_w": -108.87191772460938, "logps_train/policy_2_2": -147.82350158691406, "logps_train/policy_2_w": -125.989990234375, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": -0.0254218727350235, "rewards_train/1-l": 0.029910754412412643, "rewards_train/1-w": -0.11297351121902466, "rewards_train/2-2": -0.010474681854248047, "rewards_train/2-w": -0.030248844996094704, "rewards_train/accuracies": 0.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": -0.1428842656314373, "rewards_train/margins_1": -0.08755163848400116, "rewards_train/margins_2": 0.019774163141846657, "step": 4 }, { "epoch": 0.01, "logps_train/policy_1_2": -164.58692932128906, "logps_train/policy_1_l": -72.82464599609375, "logps_train/policy_1_w": -120.21746063232422, "logps_train/policy_2_2": -142.83587646484375, "logps_train/policy_2_w": -146.50216674804688, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -73.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.006151176989078522, "rewards_train/1-l": -0.014886701479554176, "rewards_train/1-w": -0.004168342798948288, "rewards_train/2-2": 0.029693927615880966, "rewards_train/2-w": 0.01228256244212389, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.010718358680605888, "rewards_train/margins_1": -0.01031951978802681, "rewards_train/margins_2": 0.017411365173757076, "step": 4 }, { "epoch": 0.01, "logps_train/policy_1_2": -101.14025115966797, "logps_train/policy_1_l": -69.96429443359375, "logps_train/policy_1_w": -111.87136840820312, "logps_train/policy_2_2": -90.38426208496094, "logps_train/policy_2_w": -124.75730895996094, "logps_train/ref_1_2": -101.5, "logps_train/ref_1_l": -69.5, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -90.5, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 0.005116086453199387, "rewards_train/1-l": -0.05101947486400604, "rewards_train/1-w": 0.010909955948591232, "rewards_train/2-2": 0.00337015837430954, "rewards_train/2-w": -0.009324874728918076, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.061929430812597275, "rewards_train/margins_1": 0.005793869495391846, "rewards_train/margins_2": 0.012695033103227615, "step": 5 }, { "epoch": 0.01, "logps_train/policy_1_2": -131.64385986328125, "logps_train/policy_1_l": -143.26217651367188, "logps_train/policy_1_w": -192.44635009765625, "logps_train/policy_2_2": -117.63047790527344, "logps_train/policy_2_w": -215.402099609375, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -0.006572170183062553, "rewards_train/1-l": 0.015188803896307945, "rewards_train/1-w": -0.025299303233623505, "rewards_train/2-2": 0.028358759358525276, "rewards_train/2-w": -0.025366349145770073, "rewards_train/accuracies": 0.375, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": -0.04048810712993145, "rewards_train/margins_1": -0.01872713305056095, "rewards_train/margins_2": 0.05372510850429535, "step": 5 }, { "epoch": 0.01, "logps_train/policy_1_2": -149.1110382080078, "logps_train/policy_1_l": -86.40382385253906, "logps_train/policy_1_w": -105.13836669921875, "logps_train/policy_2_2": -122.91046142578125, "logps_train/policy_2_w": -137.0255889892578, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -85.5, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": -0.10016608983278275, "rewards_train/1-l": -0.09155500680208206, "rewards_train/1-w": -0.04196152836084366, "rewards_train/2-2": -0.04964008182287216, "rewards_train/2-w": -0.04005880653858185, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.0495934784412384, "rewards_train/margins_1": 0.05820456147193909, "rewards_train/margins_2": -0.009581275284290314, "step": 5 }, { "epoch": 0.01, "logps_train/policy_1_2": -303.60076904296875, "logps_train/policy_1_l": -267.0076904296875, "logps_train/policy_1_w": -195.49673461914062, "logps_train/policy_2_2": -273.21429443359375, "logps_train/policy_2_w": -239.5916748046875, "logps_train/ref_1_2": -304.0, "logps_train/ref_1_l": -268.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -272.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 0.027421949431300163, "rewards_train/1-l": 0.017983436584472656, "rewards_train/1-w": 0.03157568350434303, "rewards_train/2-2": -0.00580420671030879, "rewards_train/2-w": 0.028333090245723724, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.013592246919870377, "rewards_train/margins_1": 0.00415373407304287, "rewards_train/margins_2": -0.034137296956032515, "step": 5 }, { "epoch": 0.01, "logps_train/policy_1_2": -218.4010009765625, "logps_train/policy_1_l": -161.08787536621094, "logps_train/policy_1_w": -199.71829223632812, "logps_train/policy_2_2": -187.9735870361328, "logps_train/policy_2_w": -227.6013946533203, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": -0.0291626937687397, "rewards_train/1-l": 0.002149295061826706, "rewards_train/1-w": 0.06567082554101944, "rewards_train/2-2": -0.012983130291104317, "rewards_train/2-w": 0.05079846456646919, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.125, "rewards_train/margins": 0.06352153047919273, "rewards_train/margins_1": 0.09483351930975914, "rewards_train/margins_2": -0.06378159485757351, "step": 5 }, { "epoch": 0.01, "logps_train/policy_1_2": -206.94219970703125, "logps_train/policy_1_l": -201.0708770751953, "logps_train/policy_1_w": -206.0855712890625, "logps_train/policy_2_2": -191.0215606689453, "logps_train/policy_2_w": -223.60150146484375, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -201.0, "logps_train/ref_1_w": -207.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 0.10265575349330902, "rewards_train/1-l": 0.003849498927593231, "rewards_train/1-w": 0.07894197106361389, "rewards_train/2-2": 0.035343218594789505, "rewards_train/2-w": 0.0773492306470871, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.07509247213602066, "rewards_train/margins_1": -0.02371378242969513, "rewards_train/margins_2": -0.04200601205229759, "step": 5 }, { "epoch": 0.01, "logps_train/policy_1_2": -86.36271667480469, "logps_train/policy_1_l": -88.9365005493164, "logps_train/policy_1_w": -89.47107696533203, "logps_train/policy_2_2": -81.04376983642578, "logps_train/policy_2_w": -95.32025146484375, "logps_train/ref_1_2": -86.0, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -81.0, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": -0.03392775356769562, "rewards_train/1-l": -0.005466148257255554, "rewards_train/1-w": 0.017735958099365234, "rewards_train/2-2": -0.03308829292654991, "rewards_train/2-w": -0.03671325743198395, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.02320210635662079, "rewards_train/margins_1": 0.05166371166706085, "rewards_train/margins_2": 0.0036249645054340363, "step": 5 }, { "epoch": 0.01, "logps_train/policy_1_2": -137.7926483154297, "logps_train/policy_1_l": -134.48590087890625, "logps_train/policy_1_w": -155.9376220703125, "logps_train/policy_2_2": -123.81320190429688, "logps_train/policy_2_w": -182.71127319335938, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -0.03160853311419487, "rewards_train/1-l": -0.010211003944277763, "rewards_train/1-w": -0.010949753224849701, "rewards_train/2-2": -0.056320540606975555, "rewards_train/2-w": -0.0008142460137605667, "rewards_train/accuracies": 0.375, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": -0.0007387492805719376, "rewards_train/margins_1": 0.02065877988934517, "rewards_train/margins_2": -0.05550629459321499, "step": 5 }, { "epoch": 0.02, "learning_rate": 8.823529411764707e-07, "loss": 2.085, "step": 6 }, { "epoch": 0.02, "logps_train/policy_1_2": -156.09567260742188, "logps_train/policy_1_l": -96.95157623291016, "logps_train/policy_1_w": -122.70411682128906, "logps_train/policy_2_2": -138.67868041992188, "logps_train/policy_2_w": -140.9466552734375, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -0.10566098988056183, "rewards_train/1-l": -0.12289164960384369, "rewards_train/1-w": -0.029786359518766403, "rewards_train/2-2": -0.04911777749657631, "rewards_train/2-w": -0.042320504784584045, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.09310529008507729, "rewards_train/margins_1": 0.07587463036179543, "rewards_train/margins_2": -0.006797272711992264, "step": 6 }, { "epoch": 0.02, "logps_train/policy_1_2": -121.68397521972656, "logps_train/policy_1_l": -91.82713317871094, "logps_train/policy_1_w": -104.1983642578125, "logps_train/policy_2_2": -105.15006256103516, "logps_train/policy_2_w": -117.56871032714844, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -104.5, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": -0.10433535277843475, "rewards_train/1-l": -0.021872924640774727, "rewards_train/1-w": 0.03953893482685089, "rewards_train/2-2": -0.10641268640756607, "rewards_train/2-w": 0.05172271654009819, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 0.06141185946762562, "rewards_train/margins_1": 0.14387428760528564, "rewards_train/margins_2": -0.15813540294766426, "step": 6 }, { "epoch": 0.02, "logps_train/policy_1_2": -186.05496215820312, "logps_train/policy_1_l": -102.46924591064453, "logps_train/policy_1_w": -101.71475219726562, "logps_train/policy_2_2": -160.7196807861328, "logps_train/policy_2_w": -126.86526489257812, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": 0.03630024194717407, "rewards_train/1-l": -0.04145608842372894, "rewards_train/1-w": 0.03711896762251854, "rewards_train/2-2": 0.09404758363962173, "rewards_train/2-w": 0.06630624830722809, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.07857505604624748, "rewards_train/margins_1": 0.0008187256753444672, "rewards_train/margins_2": 0.027741335332393646, "step": 6 }, { "epoch": 0.02, "logps_train/policy_1_2": -155.33494567871094, "logps_train/policy_1_l": -80.56683349609375, "logps_train/policy_1_w": -86.7103271484375, "logps_train/policy_2_2": -131.41964721679688, "logps_train/policy_2_w": -105.48148345947266, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -80.0, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -105.5, "rewards_train/1-2": 0.1274425983428955, "rewards_train/1-l": -0.019378116354346275, "rewards_train/1-w": 0.014123870059847832, "rewards_train/2-2": 0.04866025596857071, "rewards_train/2-w": -0.002055002376437187, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.03350198641419411, "rewards_train/margins_1": -0.11331872828304768, "rewards_train/margins_2": 0.050715258345007896, "step": 6 }, { "epoch": 0.02, "logps_train/policy_1_2": -141.77761840820312, "logps_train/policy_1_l": -93.56175994873047, "logps_train/policy_1_w": -93.67637634277344, "logps_train/policy_2_2": -128.57086181640625, "logps_train/policy_2_w": -105.37735748291016, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -93.5, "logps_train/ref_1_w": -93.5, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -105.5, "rewards_train/1-2": 0.012083245441317558, "rewards_train/1-l": -0.00871507078409195, "rewards_train/1-w": -0.00045031309127807617, "rewards_train/2-2": 0.002485077828168869, "rewards_train/2-w": 0.0028890848625451326, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.008264757692813873, "rewards_train/margins_1": -0.012533558532595634, "rewards_train/margins_2": -0.0004040070343762636, "step": 6 }, { "epoch": 0.02, "logps_train/policy_1_2": -145.09527587890625, "logps_train/policy_1_l": -131.00448608398438, "logps_train/policy_1_w": -127.84352111816406, "logps_train/policy_2_2": -123.1950454711914, "logps_train/policy_2_w": -160.59991455078125, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 0.04281587898731232, "rewards_train/1-l": -0.023689914494752884, "rewards_train/1-w": -0.03903939947485924, "rewards_train/2-2": 0.017214393243193626, "rewards_train/2-w": 0.02672790363430977, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": -0.015349484980106354, "rewards_train/margins_1": -0.08185527846217155, "rewards_train/margins_2": -0.009513510391116142, "step": 6 }, { "epoch": 0.02, "logps_train/policy_1_2": -245.72430419921875, "logps_train/policy_1_l": -289.37548828125, "logps_train/policy_1_w": -310.9945068359375, "logps_train/policy_2_2": -213.88180541992188, "logps_train/policy_2_w": -360.5625, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -288.0, "logps_train/ref_1_w": -312.0, "logps_train/ref_2_2": -215.0, "logps_train/ref_2_w": -360.0, "rewards_train/1-2": 0.0588194839656353, "rewards_train/1-l": -0.01255025900900364, "rewards_train/1-w": 0.0849265605211258, "rewards_train/2-2": 0.08369503915309906, "rewards_train/2-w": 0.006247896701097488, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.09747681953012943, "rewards_train/margins_1": 0.026107076555490494, "rewards_train/margins_2": 0.07744714245200157, "step": 6 }, { "epoch": 0.02, "logps_train/policy_1_2": -147.38653564453125, "logps_train/policy_1_l": -100.43635559082031, "logps_train/policy_1_w": -121.6854476928711, "logps_train/policy_2_2": -120.11712646484375, "logps_train/policy_2_w": -138.06118774414062, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -100.5, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 0.011542062275111675, "rewards_train/1-l": 0.014762625098228455, "rewards_train/1-w": 0.03301805257797241, "rewards_train/2-2": 0.011919375509023666, "rewards_train/2-w": 0.013412559404969215, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.018255427479743958, "rewards_train/margins_1": 0.021475990302860737, "rewards_train/margins_2": -0.001493183895945549, "step": 6 }, { "epoch": 0.02, "logps_train/policy_1_2": -130.02215576171875, "logps_train/policy_1_l": -115.16741943359375, "logps_train/policy_1_w": -100.31564331054688, "logps_train/policy_2_2": -105.7411117553711, "logps_train/policy_2_w": -117.4980239868164, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -117.0, "rewards_train/1-2": 0.0501277819275856, "rewards_train/1-l": 0.001958874985575676, "rewards_train/1-w": 0.020974798128008842, "rewards_train/2-2": 0.016513977199792862, "rewards_train/2-w": -0.01698967255651951, "rewards_train/accuracies": 0.375, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.019015923142433167, "rewards_train/margins_1": -0.02915298379957676, "rewards_train/margins_2": 0.03350364975631237, "step": 7 }, { "epoch": 0.02, "logps_train/policy_1_2": -181.35203552246094, "logps_train/policy_1_l": -136.8556365966797, "logps_train/policy_1_w": -183.04808044433594, "logps_train/policy_2_2": -158.517333984375, "logps_train/policy_2_w": -208.28009033203125, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 0.027297448366880417, "rewards_train/1-l": -0.08009488880634308, "rewards_train/1-w": 0.1061297357082367, "rewards_train/2-2": 0.037329114973545074, "rewards_train/2-w": 0.043865494430065155, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.18622462451457977, "rewards_train/margins_1": 0.07883228734135628, "rewards_train/margins_2": -0.006536379456520081, "step": 7 }, { "epoch": 0.02, "logps_train/policy_1_2": -210.01742553710938, "logps_train/policy_1_l": -169.97605895996094, "logps_train/policy_1_w": -204.3693084716797, "logps_train/policy_2_2": -178.18453979492188, "logps_train/policy_2_w": -236.18675231933594, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -205.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -236.0, "rewards_train/1-2": 0.017007922753691673, "rewards_train/1-l": 0.052394699305295944, "rewards_train/1-w": 0.06971021741628647, "rewards_train/2-2": 0.05498366057872772, "rewards_train/2-w": 0.005153270438313484, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.017315518110990524, "rewards_train/margins_1": 0.052702294662594795, "rewards_train/margins_2": 0.04983039014041424, "step": 7 }, { "epoch": 0.02, "logps_train/policy_1_2": -150.04861450195312, "logps_train/policy_1_l": -96.79496765136719, "logps_train/policy_1_w": -166.6898193359375, "logps_train/policy_2_2": -129.57423400878906, "logps_train/policy_2_w": -190.08340454101562, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 0.04904499650001526, "rewards_train/1-l": 0.014057755470275879, "rewards_train/1-w": -0.004918457940220833, "rewards_train/2-2": 0.11952931433916092, "rewards_train/2-w": 0.07603547722101212, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": -0.01897621341049671, "rewards_train/margins_1": -0.05396345444023609, "rewards_train/margins_2": 0.043493837118148804, "step": 7 }, { "epoch": 0.02, "logps_train/policy_1_2": -224.220703125, "logps_train/policy_1_l": -124.31123352050781, "logps_train/policy_1_w": -157.41259765625, "logps_train/policy_2_2": -197.33399963378906, "logps_train/policy_2_w": -181.38345336914062, "logps_train/ref_1_2": -225.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 0.09160144627094269, "rewards_train/1-l": -0.047138459980487823, "rewards_train/1-w": 0.0001464635133743286, "rewards_train/2-2": 0.06894482672214508, "rewards_train/2-w": 0.04915422573685646, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.04728492349386215, "rewards_train/margins_1": -0.09145498275756836, "rewards_train/margins_2": 0.01979060098528862, "step": 7 }, { "epoch": 0.02, "logps_train/policy_1_2": -157.9593505859375, "logps_train/policy_1_l": -106.98171997070312, "logps_train/policy_1_w": -106.49229431152344, "logps_train/policy_2_2": -134.26449584960938, "logps_train/policy_2_w": -128.31292724609375, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 0.013439369387924671, "rewards_train/1-l": -0.005691659636795521, "rewards_train/1-w": 0.03192310780286789, "rewards_train/2-2": 0.007924984209239483, "rewards_train/2-w": 0.005426911637187004, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.03761476743966341, "rewards_train/margins_1": 0.018483738414943218, "rewards_train/margins_2": 0.002498072572052479, "step": 7 }, { "epoch": 0.02, "logps_train/policy_1_2": -201.6685333251953, "logps_train/policy_1_l": -174.6761932373047, "logps_train/policy_1_w": -179.43682861328125, "logps_train/policy_2_2": -175.3757781982422, "logps_train/policy_2_w": -221.8795166015625, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 0.0538504384458065, "rewards_train/1-l": -0.057853300124406815, "rewards_train/1-w": -0.037432000041007996, "rewards_train/2-2": 0.03703121095895767, "rewards_train/2-w": 0.0026745349168777466, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.02042130008339882, "rewards_train/margins_1": -0.0912824384868145, "rewards_train/margins_2": 0.034356676042079926, "step": 7 }, { "epoch": 0.02, "logps_train/policy_1_2": -92.86325073242188, "logps_train/policy_1_l": -108.94886779785156, "logps_train/policy_1_w": -115.24732971191406, "logps_train/policy_2_2": -75.3769302368164, "logps_train/policy_2_w": -127.00762939453125, "logps_train/ref_1_2": -93.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -75.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 0.011330550536513329, "rewards_train/1-l": 0.013902818784117699, "rewards_train/1-w": 0.00026693567633628845, "rewards_train/2-2": -0.03476302698254585, "rewards_train/2-w": -0.007793828845024109, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": -0.01363588310778141, "rewards_train/margins_1": -0.01106361486017704, "rewards_train/margins_2": -0.026969198137521744, "step": 7 }, { "epoch": 0.02, "learning_rate": 1.1764705882352942e-06, "loss": 2.0451, "step": 8 }, { "epoch": 0.02, "logps_train/policy_1_2": -123.93205261230469, "logps_train/policy_1_l": -133.8191680908203, "logps_train/policy_1_w": -185.49810791015625, "logps_train/policy_2_2": -104.96076202392578, "logps_train/policy_2_w": -212.29376220703125, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -0.013518547639250755, "rewards_train/1-l": -0.011604047380387783, "rewards_train/1-w": 0.11581446975469589, "rewards_train/2-2": 0.01661876030266285, "rewards_train/2-w": 0.00968465767800808, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.12741851713508368, "rewards_train/margins_1": 0.12933301739394665, "rewards_train/margins_2": 0.00693410262465477, "step": 8 }, { "epoch": 0.02, "logps_train/policy_1_2": -194.80264282226562, "logps_train/policy_1_l": -132.3957977294922, "logps_train/policy_1_w": -118.82318115234375, "logps_train/policy_2_2": -179.42562866210938, "logps_train/policy_2_w": -136.58050537109375, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 0.07129807770252228, "rewards_train/1-l": 0.04235439747571945, "rewards_train/1-w": 0.09424470365047455, "rewards_train/2-2": 0.10353124886751175, "rewards_train/2-w": 0.1278873085975647, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.051890306174755096, "rewards_train/margins_1": 0.02294662594795227, "rewards_train/margins_2": -0.024356059730052948, "step": 8 }, { "epoch": 0.02, "logps_train/policy_1_2": -176.30767822265625, "logps_train/policy_1_l": -147.88742065429688, "logps_train/policy_1_w": -165.24484252929688, "logps_train/policy_2_2": -159.72390747070312, "logps_train/policy_2_w": -181.6165771484375, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 0.04110674560070038, "rewards_train/1-l": 0.015944574028253555, "rewards_train/1-w": 0.11457730084657669, "rewards_train/2-2": 0.038546327501535416, "rewards_train/2-w": 0.07271786034107208, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.09863272681832314, "rewards_train/margins_1": 0.07347055524587631, "rewards_train/margins_2": -0.03417153283953667, "step": 8 }, { "epoch": 0.02, "logps_train/policy_1_2": -256.18487548828125, "logps_train/policy_1_l": -164.93601989746094, "logps_train/policy_1_w": -173.58934020996094, "logps_train/policy_2_2": -235.37152099609375, "logps_train/policy_2_w": -199.57568359375, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -238.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 0.1455751657485962, "rewards_train/1-l": 0.009522940032184124, "rewards_train/1-w": 0.12973731756210327, "rewards_train/2-2": 0.19800357520580292, "rewards_train/2-w": 0.050245001912117004, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 0.12021437752991915, "rewards_train/margins_1": -0.01583784818649292, "rewards_train/margins_2": 0.1477585732936859, "step": 8 }, { "epoch": 0.02, "logps_train/policy_1_2": -151.38580322265625, "logps_train/policy_1_l": -150.1470489501953, "logps_train/policy_1_w": -193.1411590576172, "logps_train/policy_2_2": -133.93753051757812, "logps_train/policy_2_w": -224.19784545898438, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 0.10907617211341858, "rewards_train/1-l": -0.02924344129860401, "rewards_train/1-w": 0.05697772651910782, "rewards_train/2-2": 0.10820029675960541, "rewards_train/2-w": 0.027089763432741165, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.08622116781771183, "rewards_train/margins_1": -0.05209844559431076, "rewards_train/margins_2": 0.08111053332686424, "step": 8 }, { "epoch": 0.02, "logps_train/policy_1_2": -121.1734619140625, "logps_train/policy_1_l": -136.11285400390625, "logps_train/policy_1_w": -88.89256286621094, "logps_train/policy_2_2": -113.5011215209961, "logps_train/policy_2_w": -99.7983169555664, "logps_train/ref_1_2": -121.5, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -89.5, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -99.5, "rewards_train/1-2": 0.042027853429317474, "rewards_train/1-l": -0.01948852464556694, "rewards_train/1-w": 0.05214983969926834, "rewards_train/2-2": 0.04598123952746391, "rewards_train/2-w": -0.023972410708665848, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.07163836434483528, "rewards_train/margins_1": 0.010121986269950867, "rewards_train/margins_2": 0.06995365023612976, "step": 8 }, { "epoch": 0.02, "logps_train/policy_1_2": -138.56430053710938, "logps_train/policy_1_l": -201.5063018798828, "logps_train/policy_1_w": -171.89759826660156, "logps_train/policy_2_2": -122.16666412353516, "logps_train/policy_2_w": -189.80654907226562, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -201.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 0.017007648944854736, "rewards_train/1-l": -0.05258333310484886, "rewards_train/1-w": 0.09935199469327927, "rewards_train/2-2": -0.022330375388264656, "rewards_train/2-w": 0.06543896347284317, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.15193532779812813, "rewards_train/margins_1": 0.08234434574842453, "rewards_train/margins_2": -0.08776933886110783, "step": 8 }, { "epoch": 0.02, "logps_train/policy_1_2": -213.62686157226562, "logps_train/policy_1_l": -145.8352813720703, "logps_train/policy_1_w": -222.47451782226562, "logps_train/policy_2_2": -193.42140197753906, "logps_train/policy_2_w": -239.92393493652344, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 0.11719774454832077, "rewards_train/1-l": -0.01770836114883423, "rewards_train/1-w": 0.11661073565483093, "rewards_train/2-2": 0.11977413296699524, "rewards_train/2-w": 0.11698167771100998, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.13431909680366516, "rewards_train/margins_1": -0.0005870088934898376, "rewards_train/margins_2": 0.00279245525598526, "step": 8 }, { "epoch": 0.03, "logps_train/policy_1_2": -111.96533203125, "logps_train/policy_1_l": -109.13184356689453, "logps_train/policy_1_w": -134.49188232421875, "logps_train/policy_2_2": -99.68173217773438, "logps_train/policy_2_w": -159.08377075195312, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 0.10854454338550568, "rewards_train/1-l": -0.04170013591647148, "rewards_train/1-w": 0.014875512570142746, "rewards_train/2-2": 0.06893616169691086, "rewards_train/2-w": -0.005642687901854515, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.05657564848661423, "rewards_train/margins_1": -0.09366903081536293, "rewards_train/margins_2": 0.07457884959876537, "step": 9 }, { "epoch": 0.03, "logps_train/policy_1_2": -157.22366333007812, "logps_train/policy_1_l": -186.9852294921875, "logps_train/policy_1_w": -169.33895874023438, "logps_train/policy_2_2": -129.81549072265625, "logps_train/policy_2_w": -198.68923950195312, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": 0.04950904846191406, "rewards_train/1-l": 0.01944643072783947, "rewards_train/1-w": 0.13563600182533264, "rewards_train/2-2": 0.03720112144947052, "rewards_train/2-w": 0.04826347902417183, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.11618957109749317, "rewards_train/margins_1": 0.08612695336341858, "rewards_train/margins_2": -0.01106235757470131, "step": 9 }, { "epoch": 0.03, "logps_train/policy_1_2": -126.93930053710938, "logps_train/policy_1_l": -107.64044189453125, "logps_train/policy_1_w": -117.96257019042969, "logps_train/policy_2_2": -107.1368408203125, "logps_train/policy_2_w": -142.75473022460938, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 0.07013289630413055, "rewards_train/1-l": 0.0230645053088665, "rewards_train/1-w": 0.16155503690242767, "rewards_train/2-2": 0.09725312888622284, "rewards_train/2-w": 0.025308873504400253, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.13849053159356117, "rewards_train/margins_1": 0.09142214059829712, "rewards_train/margins_2": 0.07194425538182259, "step": 9 }, { "epoch": 0.03, "logps_train/policy_1_2": -148.8968048095703, "logps_train/policy_1_l": -130.2362823486328, "logps_train/policy_1_w": -143.1337127685547, "logps_train/policy_2_2": -128.56124877929688, "logps_train/policy_2_w": -156.37051391601562, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 0.0759439468383789, "rewards_train/1-l": -0.0033160513266921043, "rewards_train/1-w": 0.15147259831428528, "rewards_train/2-2": 0.11575010418891907, "rewards_train/2-w": 0.12779323756694794, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.15478864964097738, "rewards_train/margins_1": 0.07552865147590637, "rewards_train/margins_2": -0.01204313337802887, "step": 9 }, { "epoch": 0.03, "logps_train/policy_1_2": -85.20123291015625, "logps_train/policy_1_l": -81.97303009033203, "logps_train/policy_1_w": -71.98957824707031, "logps_train/policy_2_2": -74.25619506835938, "logps_train/policy_2_w": -78.97632598876953, "logps_train/ref_1_2": -86.0, "logps_train/ref_1_l": -81.5, "logps_train/ref_1_w": -72.0, "logps_train/ref_2_2": -74.5, "logps_train/ref_2_w": -79.0, "rewards_train/1-2": 0.0775328129529953, "rewards_train/1-l": -0.011560766957700253, "rewards_train/1-w": 0.002800446003675461, "rewards_train/2-2": 0.02828662469983101, "rewards_train/2-w": 0.010961366817355156, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.014361212961375713, "rewards_train/margins_1": -0.07473236694931984, "rewards_train/margins_2": 0.017325257882475853, "step": 9 }, { "epoch": 0.03, "logps_train/policy_1_2": -204.39801025390625, "logps_train/policy_1_l": -210.473388671875, "logps_train/policy_1_w": -218.2723388671875, "logps_train/policy_2_2": -181.22633361816406, "logps_train/policy_2_w": -245.1586456298828, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -247.0, "rewards_train/1-2": 0.0476994514465332, "rewards_train/1-l": 0.06281626224517822, "rewards_train/1-w": 0.17901581525802612, "rewards_train/2-2": 0.1070537120103836, "rewards_train/2-w": 0.165385439991951, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.1161995530128479, "rewards_train/margins_1": 0.13131636381149292, "rewards_train/margins_2": -0.05833172798156738, "step": 9 }, { "epoch": 0.03, "logps_train/policy_1_2": -113.44132995605469, "logps_train/policy_1_l": -111.98918151855469, "logps_train/policy_1_w": -162.37506103515625, "logps_train/policy_2_2": -94.12367248535156, "logps_train/policy_2_w": -185.90972900390625, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -94.5, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 0.050789378583431244, "rewards_train/1-l": -0.07469901442527771, "rewards_train/1-w": 0.07186945527791977, "rewards_train/2-2": 0.026890000328421593, "rewards_train/2-w": 0.17621469497680664, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 0.14656846970319748, "rewards_train/margins_1": 0.021080076694488525, "rewards_train/margins_2": -0.14932469464838505, "step": 9 }, { "epoch": 0.03, "logps_train/policy_1_2": -200.25112915039062, "logps_train/policy_1_l": -182.33877563476562, "logps_train/policy_1_w": -263.7061767578125, "logps_train/policy_2_2": -180.1322021484375, "logps_train/policy_2_w": -300.26739501953125, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -266.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -300.0, "rewards_train/1-2": 0.15613700449466705, "rewards_train/1-l": -0.07762699574232101, "rewards_train/1-w": 0.18875734508037567, "rewards_train/2-2": 0.12427892535924911, "rewards_train/2-w": 0.04825868457555771, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.2663843408226967, "rewards_train/margins_1": 0.03262034058570862, "rewards_train/margins_2": 0.0760202407836914, "step": 9 }, { "epoch": 0.03, "learning_rate": 1.4705882352941177e-06, "loss": 1.9499, "step": 10 }, { "epoch": 0.03, "logps_train/policy_1_2": -187.95765686035156, "logps_train/policy_1_l": -177.44985961914062, "logps_train/policy_1_w": -216.6949005126953, "logps_train/policy_2_2": -167.45004272460938, "logps_train/policy_2_w": -241.17654418945312, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -242.0, "rewards_train/1-2": 0.14173394441604614, "rewards_train/1-l": -0.04498548433184624, "rewards_train/1-w": 0.13051119446754456, "rewards_train/2-2": 0.1549951583147049, "rewards_train/2-w": 0.07609710097312927, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.1754966787993908, "rewards_train/margins_1": -0.011222749948501587, "rewards_train/margins_2": 0.07889805734157562, "step": 10 }, { "epoch": 0.03, "logps_train/policy_1_2": -188.45884704589844, "logps_train/policy_1_l": -117.44640350341797, "logps_train/policy_1_w": -159.21755981445312, "logps_train/policy_2_2": -168.39761352539062, "logps_train/policy_2_w": -177.84042358398438, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 0.17208464443683624, "rewards_train/1-l": 0.06883608549833298, "rewards_train/1-w": 0.18762007355690002, "rewards_train/2-2": 0.15594306588172913, "rewards_train/2-w": 0.136269211769104, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.11878398805856705, "rewards_train/margins_1": 0.015535429120063782, "rewards_train/margins_2": 0.019673854112625122, "step": 10 }, { "epoch": 0.03, "logps_train/policy_1_2": -132.4787139892578, "logps_train/policy_1_l": -117.10811614990234, "logps_train/policy_1_w": -116.37138366699219, "logps_train/policy_2_2": -107.05122375488281, "logps_train/policy_2_w": -148.37625122070312, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 0.06384717673063278, "rewards_train/1-l": 0.07610175013542175, "rewards_train/1-w": 0.19337916374206543, "rewards_train/2-2": 0.03940925747156143, "rewards_train/2-w": 0.11862504482269287, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.11727741360664368, "rewards_train/margins_1": 0.12953198701143265, "rewards_train/margins_2": -0.07921578735113144, "step": 10 }, { "epoch": 0.03, "logps_train/policy_1_2": -193.18267822265625, "logps_train/policy_1_l": -179.98297119140625, "logps_train/policy_1_w": -128.50006103515625, "logps_train/policy_2_2": -172.6953582763672, "logps_train/policy_2_w": -144.86416625976562, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 0.18798132240772247, "rewards_train/1-l": 0.005218906328082085, "rewards_train/1-w": 0.03241557627916336, "rewards_train/2-2": 0.18671399354934692, "rewards_train/2-w": 0.004793869331479073, "rewards_train/accuracies": 0.375, "rewards_train/accuracies_1": 0.125, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.027196669951081276, "rewards_train/margins_1": -0.1555657461285591, "rewards_train/margins_2": 0.18192012421786785, "step": 10 }, { "epoch": 0.03, "logps_train/policy_1_2": -114.00776672363281, "logps_train/policy_1_l": -62.57463836669922, "logps_train/policy_1_w": -59.317203521728516, "logps_train/policy_2_2": -103.94587707519531, "logps_train/policy_2_w": -64.6264419555664, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -63.0, "logps_train/ref_1_w": -59.5, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -65.0, "rewards_train/1-2": 0.09922274947166443, "rewards_train/1-l": 0.04116902872920036, "rewards_train/1-w": 0.016326868906617165, "rewards_train/2-2": 0.10541225224733353, "rewards_train/2-w": 0.03169146552681923, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": -0.0248421598225832, "rewards_train/margins_1": -0.08289588056504726, "rewards_train/margins_2": 0.0737207867205143, "step": 10 }, { "epoch": 0.03, "logps_train/policy_1_2": -140.68185424804688, "logps_train/policy_1_l": -189.1480712890625, "logps_train/policy_1_w": -118.67457580566406, "logps_train/policy_2_2": -118.16041564941406, "logps_train/policy_2_w": -141.29112243652344, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 0.08337700366973877, "rewards_train/1-l": 0.09808327257633209, "rewards_train/1-w": 0.2473866045475006, "rewards_train/2-2": 0.1378648430109024, "rewards_train/2-w": 0.1490129679441452, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.14930333197116852, "rewards_train/margins_1": 0.16400960087776184, "rewards_train/margins_2": -0.011148124933242798, "step": 10 }, { "epoch": 0.03, "logps_train/policy_1_2": -177.72325134277344, "logps_train/policy_1_l": -236.1971893310547, "logps_train/policy_1_w": -148.47628784179688, "logps_train/policy_2_2": -153.169921875, "logps_train/policy_2_w": -173.64517211914062, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -236.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 0.1776747703552246, "rewards_train/1-l": 0.03262357413768768, "rewards_train/1-w": 0.1429966688156128, "rewards_train/2-2": 0.22363261878490448, "rewards_train/2-w": 0.14602932333946228, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.11037309467792511, "rewards_train/margins_1": -0.034678101539611816, "rewards_train/margins_2": 0.0776032954454422, "step": 10 }, { "epoch": 0.03, "logps_train/policy_1_2": -214.3388671875, "logps_train/policy_1_l": -190.83203125, "logps_train/policy_1_w": -155.0267333984375, "logps_train/policy_2_2": -187.09356689453125, "logps_train/policy_2_w": -187.4450225830078, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 0.16220752894878387, "rewards_train/1-l": -0.02812519483268261, "rewards_train/1-w": 0.17545060813426971, "rewards_train/2-2": 0.20782987773418427, "rewards_train/2-w": 0.1586231291294098, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.20357580296695232, "rewards_train/margins_1": 0.01324307918548584, "rewards_train/margins_2": 0.049206748604774475, "step": 10 }, { "epoch": 0.03, "logps_train/policy_1_2": -175.18353271484375, "logps_train/policy_1_l": -152.77203369140625, "logps_train/policy_1_w": -139.48947143554688, "logps_train/policy_2_2": -148.89404296875, "logps_train/policy_2_w": -165.27072143554688, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 0.0003967229276895523, "rewards_train/1-l": 0.051305875182151794, "rewards_train/1-w": 0.17917728424072266, "rewards_train/2-2": 0.0855959877371788, "rewards_train/2-w": 0.04949023947119713, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.12787140905857086, "rewards_train/margins_1": 0.1787805613130331, "rewards_train/margins_2": 0.036105748265981674, "step": 11 }, { "epoch": 0.03, "logps_train/policy_1_2": -167.47003173828125, "logps_train/policy_1_l": -211.865966796875, "logps_train/policy_1_w": -194.57083129882812, "logps_train/policy_2_2": -146.61703491210938, "logps_train/policy_2_w": -219.60540771484375, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 0.1748715192079544, "rewards_train/1-l": -0.001048903912305832, "rewards_train/1-w": 0.2804172933101654, "rewards_train/2-2": 0.17735978960990906, "rewards_train/2-w": 0.28555309772491455, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.28146619722247124, "rewards_train/margins_1": 0.105545774102211, "rewards_train/margins_2": -0.1081933081150055, "step": 11 }, { "epoch": 0.03, "logps_train/policy_1_2": -153.20494079589844, "logps_train/policy_1_l": -116.69633483886719, "logps_train/policy_1_w": -148.83108520507812, "logps_train/policy_2_2": -131.99636840820312, "logps_train/policy_2_w": -169.94314575195312, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -116.5, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 0.19200530648231506, "rewards_train/1-l": -0.03799273073673248, "rewards_train/1-w": 0.24657948315143585, "rewards_train/2-2": 0.22536200284957886, "rewards_train/2-w": 0.1775582730770111, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.28457221388816833, "rewards_train/margins_1": 0.05457417666912079, "rewards_train/margins_2": 0.04780372977256775, "step": 11 }, { "epoch": 0.03, "logps_train/policy_1_2": -126.2037353515625, "logps_train/policy_1_l": -81.96675872802734, "logps_train/policy_1_w": -121.3655776977539, "logps_train/policy_2_2": -105.79832458496094, "logps_train/policy_2_w": -139.3807373046875, "logps_train/ref_1_2": -127.5, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": 0.11712636798620224, "rewards_train/1-l": 0.005277073476463556, "rewards_train/1-w": 0.07906708121299744, "rewards_train/2-2": 0.14204254746437073, "rewards_train/2-w": -0.03182344511151314, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.07379000773653388, "rewards_train/margins_1": -0.0380592867732048, "rewards_train/margins_2": 0.17386599257588387, "step": 11 }, { "epoch": 0.03, "logps_train/policy_1_2": -308.7889404296875, "logps_train/policy_1_l": -252.37828063964844, "logps_train/policy_1_w": -216.3975830078125, "logps_train/policy_2_2": -281.09197998046875, "logps_train/policy_2_w": -250.1798095703125, "logps_train/ref_1_2": -312.0, "logps_train/ref_1_l": -254.0, "logps_train/ref_1_w": -219.0, "logps_train/ref_2_2": -288.0, "logps_train/ref_2_w": -252.0, "rewards_train/1-2": 0.43048205971717834, "rewards_train/1-l": 0.0754527896642685, "rewards_train/1-w": 0.27274227142333984, "rewards_train/2-2": 0.5376778244972229, "rewards_train/2-w": 0.25701770186424255, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.19728948175907135, "rewards_train/margins_1": -0.1577397882938385, "rewards_train/margins_2": 0.28066012263298035, "step": 11 }, { "epoch": 0.03, "logps_train/policy_1_2": -119.40104675292969, "logps_train/policy_1_l": -103.19485473632812, "logps_train/policy_1_w": -146.3577423095703, "logps_train/policy_2_2": -107.35333251953125, "logps_train/policy_2_w": -165.72390747070312, "logps_train/ref_1_2": -120.5, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.12395773828029633, "rewards_train/1-l": -0.0048370687291026115, "rewards_train/1-w": 0.20563305914402008, "rewards_train/2-2": 0.18654212355613708, "rewards_train/2-w": 0.17526507377624512, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.2104701278731227, "rewards_train/margins_1": 0.08167532086372375, "rewards_train/margins_2": 0.011277049779891968, "step": 11 }, { "epoch": 0.03, "logps_train/policy_1_2": -145.6078643798828, "logps_train/policy_1_l": -106.8051986694336, "logps_train/policy_1_w": -155.18988037109375, "logps_train/policy_2_2": -121.47425842285156, "logps_train/policy_2_w": -191.23243713378906, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 0.18218281865119934, "rewards_train/1-l": 0.01064196228981018, "rewards_train/1-w": 0.2177293598651886, "rewards_train/2-2": 0.15804234147071838, "rewards_train/2-w": 0.09394426643848419, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.20708739757537842, "rewards_train/margins_1": 0.03554654121398926, "rewards_train/margins_2": 0.06409807503223419, "step": 11 }, { "epoch": 0.03, "logps_train/policy_1_2": -94.19523620605469, "logps_train/policy_1_l": -89.07971954345703, "logps_train/policy_1_w": -72.76922607421875, "logps_train/policy_2_2": -77.9583969116211, "logps_train/policy_2_w": -86.33387756347656, "logps_train/ref_1_2": -96.0, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -74.0, "logps_train/ref_2_2": -79.5, "logps_train/ref_2_w": -87.5, "rewards_train/1-2": 0.15850333869457245, "rewards_train/1-l": -0.03062812238931656, "rewards_train/1-w": 0.12297921627759933, "rewards_train/2-2": 0.16988278925418854, "rewards_train/2-w": 0.1190531924366951, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.1536073386669159, "rewards_train/margins_1": -0.035524122416973114, "rewards_train/margins_2": 0.05082959681749344, "step": 11 }, { "epoch": 0.04, "learning_rate": 1.7647058823529414e-06, "loss": 1.8688, "step": 12 }, { "epoch": 0.04, "logps_train/policy_1_2": -90.306884765625, "logps_train/policy_1_l": -100.04676055908203, "logps_train/policy_1_w": -113.50331115722656, "logps_train/policy_2_2": -76.42731475830078, "logps_train/policy_2_w": -141.40931701660156, "logps_train/ref_1_2": -92.5, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -78.5, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 0.24284754693508148, "rewards_train/1-l": -0.0057498998939991, "rewards_train/1-w": 0.43638837337493896, "rewards_train/2-2": 0.21175478398799896, "rewards_train/2-w": 0.22938194870948792, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.44213827326893806, "rewards_train/margins_1": 0.19354082643985748, "rewards_train/margins_2": -0.017627164721488953, "step": 12 }, { "epoch": 0.04, "logps_train/policy_1_2": -216.56402587890625, "logps_train/policy_1_l": -145.49343872070312, "logps_train/policy_1_w": -161.62814331054688, "logps_train/policy_2_2": -193.08001708984375, "logps_train/policy_2_w": -183.90164184570312, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 0.3623471260070801, "rewards_train/1-l": 0.08053968846797943, "rewards_train/1-w": 0.42361152172088623, "rewards_train/2-2": 0.4357466697692871, "rewards_train/2-w": 0.2754611372947693, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.3430718332529068, "rewards_train/margins_1": 0.06126439571380615, "rewards_train/margins_2": 0.16028553247451782, "step": 12 }, { "epoch": 0.04, "logps_train/policy_1_2": -168.13449096679688, "logps_train/policy_1_l": -128.4370880126953, "logps_train/policy_1_w": -132.5531005859375, "logps_train/policy_2_2": -124.27072143554688, "logps_train/policy_2_w": -159.9315948486328, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 0.1693628877401352, "rewards_train/1-l": 0.02777513489127159, "rewards_train/1-w": 0.27125129103660583, "rewards_train/2-2": 0.17292767763137817, "rewards_train/2-w": 0.2287154197692871, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.24347615614533424, "rewards_train/margins_1": 0.10188840329647064, "rewards_train/margins_2": -0.055787742137908936, "step": 12 }, { "epoch": 0.04, "logps_train/policy_1_2": -195.84872436523438, "logps_train/policy_1_l": -167.049072265625, "logps_train/policy_1_w": -172.52850341796875, "logps_train/policy_2_2": -174.15078735351562, "logps_train/policy_2_w": -205.59056091308594, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 0.38387784361839294, "rewards_train/1-l": 0.00017172656953334808, "rewards_train/1-w": 0.46199271082878113, "rewards_train/2-2": 0.298984169960022, "rewards_train/2-w": 0.4128187298774719, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.4618209842592478, "rewards_train/margins_1": 0.07811486721038818, "rewards_train/margins_2": -0.11383455991744995, "step": 12 }, { "epoch": 0.04, "logps_train/policy_1_2": -138.47470092773438, "logps_train/policy_1_l": -93.86638641357422, "logps_train/policy_1_w": -137.69699096679688, "logps_train/policy_2_2": -117.45197296142578, "logps_train/policy_2_w": -155.07432556152344, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 0.2775295674800873, "rewards_train/1-l": 0.01785413548350334, "rewards_train/1-w": 0.3662388026714325, "rewards_train/2-2": 0.31886494159698486, "rewards_train/2-w": 0.29881688952445984, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.34838466718792915, "rewards_train/margins_1": 0.08870923519134521, "rewards_train/margins_2": 0.020048052072525024, "step": 12 }, { "epoch": 0.04, "logps_train/policy_1_2": -128.72964477539062, "logps_train/policy_1_l": -87.17668151855469, "logps_train/policy_1_w": -95.09010314941406, "logps_train/policy_2_2": -113.63858032226562, "logps_train/policy_2_w": -115.01467895507812, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -98.5, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -117.5, "rewards_train/1-2": 0.24070800840854645, "rewards_train/1-l": -0.015715084969997406, "rewards_train/1-w": 0.3292711079120636, "rewards_train/2-2": 0.2714935839176178, "rewards_train/2-w": 0.27431362867355347, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.344986192882061, "rewards_train/margins_1": 0.08856309950351715, "rewards_train/margins_2": -0.002820044755935669, "step": 12 }, { "epoch": 0.04, "logps_train/policy_1_2": -140.43887329101562, "logps_train/policy_1_l": -83.15711975097656, "logps_train/policy_1_w": -112.49451446533203, "logps_train/policy_2_2": -125.76463317871094, "logps_train/policy_2_w": -127.6072006225586, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 0.42994043231010437, "rewards_train/1-l": 0.05069439485669136, "rewards_train/1-w": 0.4581657648086548, "rewards_train/2-2": 0.422267347574234, "rewards_train/2-w": 0.3922092318534851, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.4074713699519634, "rewards_train/margins_1": 0.028225332498550415, "rewards_train/margins_2": 0.0300581157207489, "step": 12 }, { "epoch": 0.04, "logps_train/policy_1_2": -87.80473327636719, "logps_train/policy_1_l": -42.0208740234375, "logps_train/policy_1_w": -77.29104614257812, "logps_train/policy_2_2": -74.56254577636719, "logps_train/policy_2_w": -99.40138244628906, "logps_train/ref_1_2": -89.0, "logps_train/ref_1_l": -42.0, "logps_train/ref_1_w": -79.0, "logps_train/ref_2_2": -76.0, "logps_train/ref_2_w": -100.5, "rewards_train/1-2": 0.11952652782201767, "rewards_train/1-l": -0.010485822334885597, "rewards_train/1-w": 0.16933318972587585, "rewards_train/2-2": 0.15077674388885498, "rewards_train/2-w": 0.1430651843547821, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.125, "rewards_train/margins": 0.17981901206076145, "rewards_train/margins_1": 0.049806661903858185, "rewards_train/margins_2": 0.007711559534072876, "step": 12 }, { "epoch": 0.04, "logps_train/policy_1_2": -151.66561889648438, "logps_train/policy_1_l": -94.33541870117188, "logps_train/policy_1_w": -109.24689483642578, "logps_train/policy_2_2": -126.20193481445312, "logps_train/policy_2_w": -132.61265563964844, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -95.5, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.4240629971027374, "rewards_train/1-l": 0.1012236475944519, "rewards_train/1-w": 0.26671624183654785, "rewards_train/2-2": 0.4407440423965454, "rewards_train/2-w": 0.20396935939788818, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.16549259424209595, "rewards_train/margins_1": -0.15734675526618958, "rewards_train/margins_2": 0.23677468299865723, "step": 13 }, { "epoch": 0.04, "logps_train/policy_1_2": -149.79095458984375, "logps_train/policy_1_l": -208.77708435058594, "logps_train/policy_1_w": -143.56137084960938, "logps_train/policy_2_2": -128.61727905273438, "logps_train/policy_2_w": -177.39219665527344, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 0.04043593630194664, "rewards_train/1-l": 0.1043231263756752, "rewards_train/1-w": 0.4485512375831604, "rewards_train/2-2": 0.10975641757249832, "rewards_train/2-w": 0.39398330450057983, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.125, "rewards_train/margins": 0.3442281112074852, "rewards_train/margins_1": 0.40811530128121376, "rewards_train/margins_2": -0.2842268869280815, "step": 13 }, { "epoch": 0.04, "logps_train/policy_1_2": -157.0841064453125, "logps_train/policy_1_l": -165.61920166015625, "logps_train/policy_1_w": -120.12684631347656, "logps_train/policy_2_2": -132.8372039794922, "logps_train/policy_2_w": -143.08238220214844, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 0.29471492767333984, "rewards_train/1-l": 0.03036540374159813, "rewards_train/1-w": 0.22559630870819092, "rewards_train/2-2": 0.4397168755531311, "rewards_train/2-w": 0.22535640001296997, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 0.1952309049665928, "rewards_train/margins_1": -0.06911861896514893, "rewards_train/margins_2": 0.21436047554016113, "step": 13 }, { "epoch": 0.04, "logps_train/policy_1_2": -57.78178024291992, "logps_train/policy_1_l": -44.62439727783203, "logps_train/policy_1_w": -70.26434326171875, "logps_train/policy_2_2": -47.97504425048828, "logps_train/policy_2_w": -80.18855285644531, "logps_train/ref_1_2": -59.0, "logps_train/ref_1_l": -44.25, "logps_train/ref_1_w": -71.5, "logps_train/ref_2_2": -49.0, "logps_train/ref_2_w": -81.0, "rewards_train/1-2": 0.13041594624519348, "rewards_train/1-l": -0.022986842319369316, "rewards_train/1-w": 0.12356499582529068, "rewards_train/2-2": 0.10483908653259277, "rewards_train/2-w": 0.06981603801250458, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.14655183814466, "rewards_train/margins_1": -0.0068509504199028015, "rewards_train/margins_2": 0.035023048520088196, "step": 13 }, { "epoch": 0.04, "logps_train/policy_1_2": -136.62322998046875, "logps_train/policy_1_l": -160.75460815429688, "logps_train/policy_1_w": -115.7517318725586, "logps_train/policy_2_2": -119.48245239257812, "logps_train/policy_2_w": -130.63449096679688, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.2361152619123459, "rewards_train/1-l": 0.028922151774168015, "rewards_train/1-w": 0.30490511655807495, "rewards_train/2-2": 0.2361297458410263, "rewards_train/2-w": 0.3072531819343567, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.27598296478390694, "rewards_train/margins_1": 0.06878985464572906, "rewards_train/margins_2": -0.07112343609333038, "step": 13 }, { "epoch": 0.04, "logps_train/policy_1_2": -135.82223510742188, "logps_train/policy_1_l": -130.73794555664062, "logps_train/policy_1_w": -117.29891204833984, "logps_train/policy_2_2": -116.98600769042969, "logps_train/policy_2_w": -137.78939819335938, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": 0.2084006816148758, "rewards_train/1-l": 0.024253450334072113, "rewards_train/1-w": 0.1263587772846222, "rewards_train/2-2": 0.24514904618263245, "rewards_train/2-w": 0.1429360955953598, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.10210532695055008, "rewards_train/margins_1": -0.0820419043302536, "rewards_train/margins_2": 0.10221295058727264, "step": 13 }, { "epoch": 0.04, "logps_train/policy_1_2": -194.23193359375, "logps_train/policy_1_l": -209.92088317871094, "logps_train/policy_1_w": -158.75704956054688, "logps_train/policy_2_2": -161.3314208984375, "logps_train/policy_2_w": -188.6971435546875, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 0.35493171215057373, "rewards_train/1-l": 0.14931748807430267, "rewards_train/1-w": 0.3586711883544922, "rewards_train/2-2": 0.4949822425842285, "rewards_train/2-w": 0.3396610617637634, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.20935370028018951, "rewards_train/margins_1": 0.003739476203918457, "rewards_train/margins_2": 0.1553211808204651, "step": 13 }, { "epoch": 0.04, "logps_train/policy_1_2": -144.08297729492188, "logps_train/policy_1_l": -93.28341674804688, "logps_train/policy_1_w": -71.28767395019531, "logps_train/policy_2_2": -120.69100952148438, "logps_train/policy_2_w": -88.0600357055664, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -93.5, "logps_train/ref_1_w": -73.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -89.0, "rewards_train/1-2": 0.33232712745666504, "rewards_train/1-l": 0.011599699966609478, "rewards_train/1-w": 0.17904461920261383, "rewards_train/2-2": 0.3332430422306061, "rewards_train/2-w": 0.12094946950674057, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.16744491923600435, "rewards_train/margins_1": -0.1532825082540512, "rewards_train/margins_2": 0.2122935727238655, "step": 13 }, { "epoch": 0.04, "learning_rate": 2.058823529411765e-06, "loss": 1.7457, "step": 14 }, { "epoch": 0.04, "logps_train/policy_1_2": -164.96353149414062, "logps_train/policy_1_l": -117.68223571777344, "logps_train/policy_1_w": -130.12680053710938, "logps_train/policy_2_2": -148.621826171875, "logps_train/policy_2_w": -142.53964233398438, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 0.605600118637085, "rewards_train/1-l": 0.12494005262851715, "rewards_train/1-w": 0.22315990924835205, "rewards_train/2-2": 0.6593015193939209, "rewards_train/2-w": 0.21439437568187714, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 0.0982198566198349, "rewards_train/margins_1": -0.3824402093887329, "rewards_train/margins_2": 0.44490714371204376, "step": 14 }, { "epoch": 0.04, "logps_train/policy_1_2": -169.96371459960938, "logps_train/policy_1_l": -170.35012817382812, "logps_train/policy_1_w": -130.15420532226562, "logps_train/policy_2_2": -152.4987030029297, "logps_train/policy_2_w": -154.46664428710938, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 0.65519118309021, "rewards_train/1-l": 0.09233029186725616, "rewards_train/1-w": 0.5236412286758423, "rewards_train/2-2": 0.7235670685768127, "rewards_train/2-w": 0.45489880442619324, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.4313109368085861, "rewards_train/margins_1": -0.13154995441436768, "rewards_train/margins_2": 0.2686682641506195, "step": 14 }, { "epoch": 0.04, "logps_train/policy_1_2": -169.33102416992188, "logps_train/policy_1_l": -192.111083984375, "logps_train/policy_1_w": -131.5635986328125, "logps_train/policy_2_2": -148.71063232421875, "logps_train/policy_2_w": -154.19113159179688, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 0.6813503503799438, "rewards_train/1-l": 0.17443880438804626, "rewards_train/1-w": 0.46864092350006104, "rewards_train/2-2": 0.6711248755455017, "rewards_train/2-w": 0.3867459297180176, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.29420211911201477, "rewards_train/margins_1": -0.2127094268798828, "rewards_train/margins_2": 0.28437894582748413, "step": 14 }, { "epoch": 0.04, "logps_train/policy_1_2": -134.5885009765625, "logps_train/policy_1_l": -132.16696166992188, "logps_train/policy_1_w": -120.27967834472656, "logps_train/policy_2_2": -118.47919464111328, "logps_train/policy_2_w": -138.80044555664062, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 0.4505254030227661, "rewards_train/1-l": 0.009866463020443916, "rewards_train/1-w": 0.4931260347366333, "rewards_train/2-2": 0.45520591735839844, "rewards_train/2-w": 0.33245426416397095, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.4832595717161894, "rewards_train/margins_1": 0.04260063171386719, "rewards_train/margins_2": 0.12275165319442749, "step": 14 }, { "epoch": 0.04, "logps_train/policy_1_2": -176.9114990234375, "logps_train/policy_1_l": -179.74899291992188, "logps_train/policy_1_w": -153.59423828125, "logps_train/policy_2_2": -150.5365447998047, "logps_train/policy_2_w": -185.4136962890625, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 0.3650999069213867, "rewards_train/1-l": 0.13447490334510803, "rewards_train/1-w": 0.37963950634002686, "rewards_train/2-2": 0.4275960922241211, "rewards_train/2-w": 0.3187865614891052, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.24516460299491882, "rewards_train/margins_1": 0.014539599418640137, "rewards_train/margins_2": 0.10880953073501587, "step": 14 }, { "epoch": 0.04, "logps_train/policy_1_2": -174.33074951171875, "logps_train/policy_1_l": -155.36492919921875, "logps_train/policy_1_w": -173.2540283203125, "logps_train/policy_2_2": -151.70770263671875, "logps_train/policy_2_w": -206.78640747070312, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": 0.6294242739677429, "rewards_train/1-l": 0.06975777447223663, "rewards_train/1-w": 0.708972156047821, "rewards_train/2-2": 0.6573556661605835, "rewards_train/2-w": 0.4354212284088135, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.6392143815755844, "rewards_train/margins_1": 0.07954788208007812, "rewards_train/margins_2": 0.22193443775177002, "step": 14 }, { "epoch": 0.04, "logps_train/policy_1_2": -128.92665100097656, "logps_train/policy_1_l": -108.35939025878906, "logps_train/policy_1_w": -149.1419677734375, "logps_train/policy_2_2": -108.73863220214844, "logps_train/policy_2_w": -174.48435974121094, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 0.23233510553836823, "rewards_train/1-l": -0.04726674407720566, "rewards_train/1-w": 0.8029903173446655, "rewards_train/2-2": 0.4230121374130249, "rewards_train/2-w": 0.7781261205673218, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 0.8502570614218712, "rewards_train/margins_1": 0.5706552118062973, "rewards_train/margins_2": -0.3551139831542969, "step": 14 }, { "epoch": 0.04, "logps_train/policy_1_2": -170.80467224121094, "logps_train/policy_1_l": -210.5686492919922, "logps_train/policy_1_w": -149.9840087890625, "logps_train/policy_2_2": -145.81427001953125, "logps_train/policy_2_w": -189.95901489257812, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 0.5507825016975403, "rewards_train/1-l": 0.08053791522979736, "rewards_train/1-w": 0.5078498125076294, "rewards_train/2-2": 0.6466984152793884, "rewards_train/2-w": 0.416597843170166, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.42731189727783203, "rewards_train/margins_1": -0.04293268918991089, "rewards_train/margins_2": 0.2301005721092224, "step": 14 }, { "epoch": 0.04, "logps_train/policy_1_2": -178.75405883789062, "logps_train/policy_1_l": -167.23330688476562, "logps_train/policy_1_w": -132.7965087890625, "logps_train/policy_2_2": -156.38897705078125, "logps_train/policy_2_w": -160.0430908203125, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 0.5870935916900635, "rewards_train/1-l": 0.07451970875263214, "rewards_train/1-w": 0.5234734416007996, "rewards_train/2-2": 0.6079785823822021, "rewards_train/2-w": 0.47381627559661865, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.4489537328481674, "rewards_train/margins_1": -0.06362015008926392, "rewards_train/margins_2": 0.1341623067855835, "step": 15 }, { "epoch": 0.04, "logps_train/policy_1_2": -89.15411376953125, "logps_train/policy_1_l": -79.29969024658203, "logps_train/policy_1_w": -87.68214416503906, "logps_train/policy_2_2": -81.551513671875, "logps_train/policy_2_w": -95.88516235351562, "logps_train/ref_1_2": -91.5, "logps_train/ref_1_l": -79.5, "logps_train/ref_1_w": -92.5, "logps_train/ref_2_2": -84.0, "logps_train/ref_2_w": -100.5, "rewards_train/1-2": 0.23771324753761292, "rewards_train/1-l": 0.042492207139730453, "rewards_train/1-w": 0.504245936870575, "rewards_train/2-2": 0.26281771063804626, "rewards_train/2-w": 0.48570218682289124, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.4617537297308445, "rewards_train/margins_1": 0.26653268933296204, "rewards_train/margins_2": -0.22288447618484497, "step": 15 }, { "epoch": 0.04, "logps_train/policy_1_2": -202.26416015625, "logps_train/policy_1_l": -176.12930297851562, "logps_train/policy_1_w": -130.00726318359375, "logps_train/policy_2_2": -179.06727600097656, "logps_train/policy_2_w": -152.4793701171875, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 0.5860841870307922, "rewards_train/1-l": 0.015195317566394806, "rewards_train/1-w": 0.29302388429641724, "rewards_train/2-2": 0.6307728290557861, "rewards_train/2-w": 0.3708135485649109, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.27782856673002243, "rewards_train/margins_1": -0.293060302734375, "rewards_train/margins_2": 0.25995928049087524, "step": 15 }, { "epoch": 0.04, "logps_train/policy_1_2": -55.39781188964844, "logps_train/policy_1_l": -25.939884185791016, "logps_train/policy_1_w": -76.76577758789062, "logps_train/policy_2_2": -49.96318817138672, "logps_train/policy_2_w": -89.84552001953125, "logps_train/ref_1_2": -58.0, "logps_train/ref_1_l": -25.75, "logps_train/ref_1_w": -79.0, "logps_train/ref_2_2": -52.5, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": 0.2551406919956207, "rewards_train/1-l": -0.009222769178450108, "rewards_train/1-w": 0.2501804232597351, "rewards_train/2-2": 0.2663767337799072, "rewards_train/2-w": 0.222479447722435, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.2594031924381852, "rewards_train/margins_1": -0.00496026873588562, "rewards_train/margins_2": 0.04389728605747223, "step": 15 }, { "epoch": 0.04, "logps_train/policy_1_2": -192.13034057617188, "logps_train/policy_1_l": -174.69757080078125, "logps_train/policy_1_w": -179.05618286132812, "logps_train/policy_2_2": -168.3564453125, "logps_train/policy_2_w": -198.0587158203125, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 0.6697800159454346, "rewards_train/1-l": -0.07376017421483994, "rewards_train/1-w": 0.7811011075973511, "rewards_train/2-2": 0.5737312436103821, "rewards_train/2-w": 0.6074095964431763, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.854861281812191, "rewards_train/margins_1": 0.1113210916519165, "rewards_train/margins_2": -0.03367835283279419, "step": 15 }, { "epoch": 0.04, "logps_train/policy_1_2": -221.38800048828125, "logps_train/policy_1_l": -140.1557159423828, "logps_train/policy_1_w": -170.31509399414062, "logps_train/policy_2_2": -193.57308959960938, "logps_train/policy_2_w": -197.859619140625, "logps_train/ref_1_2": -227.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 0.5854184627532959, "rewards_train/1-l": 0.04458431899547577, "rewards_train/1-w": 0.9114599823951721, "rewards_train/2-2": 0.5669090747833252, "rewards_train/2-w": 0.7484122514724731, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.8668756633996964, "rewards_train/margins_1": 0.3260415196418762, "rewards_train/margins_2": -0.18150317668914795, "step": 15 }, { "epoch": 0.04, "logps_train/policy_1_2": -128.06289672851562, "logps_train/policy_1_l": -162.50555419921875, "logps_train/policy_1_w": -164.27914428710938, "logps_train/policy_2_2": -110.63407897949219, "logps_train/policy_2_w": -183.27757263183594, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 0.26910242438316345, "rewards_train/1-l": 0.1103816032409668, "rewards_train/1-w": 0.4970846176147461, "rewards_train/2-2": 0.32096728682518005, "rewards_train/2-w": 0.5534923076629639, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 0.3867030143737793, "rewards_train/margins_1": 0.22798219323158264, "rewards_train/margins_2": -0.2325250208377838, "step": 15 }, { "epoch": 0.04, "logps_train/policy_1_2": -110.2044677734375, "logps_train/policy_1_l": -130.18011474609375, "logps_train/policy_1_w": -103.84503173828125, "logps_train/policy_2_2": -93.74468994140625, "logps_train/policy_2_w": -129.30445861816406, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -108.5, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 0.34175950288772583, "rewards_train/1-l": 0.0008364692330360413, "rewards_train/1-w": 0.47096550464630127, "rewards_train/2-2": 0.2613460123538971, "rewards_train/2-w": 0.40314725041389465, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.47012903541326523, "rewards_train/margins_1": 0.12920600175857544, "rewards_train/margins_2": -0.14180123805999756, "step": 15 }, { "epoch": 0.05, "learning_rate": 2.3529411764705885e-06, "loss": 1.5832, "step": 16 }, { "epoch": 0.05, "logps_train/policy_1_2": -191.88143920898438, "logps_train/policy_1_l": -135.21441650390625, "logps_train/policy_1_w": -164.92120361328125, "logps_train/policy_2_2": -172.4396209716797, "logps_train/policy_2_w": -190.73464965820312, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 0.9868556261062622, "rewards_train/1-l": -0.058575574308633804, "rewards_train/1-w": 1.0894951820373535, "rewards_train/2-2": 1.1341626644134521, "rewards_train/2-w": 0.916378915309906, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.1480707563459873, "rewards_train/margins_1": 0.10263955593109131, "rewards_train/margins_2": 0.21778374910354614, "step": 16 }, { "epoch": 0.05, "logps_train/policy_1_2": -174.03067016601562, "logps_train/policy_1_l": -164.73924255371094, "logps_train/policy_1_w": -125.70481872558594, "logps_train/policy_2_2": -153.13790893554688, "logps_train/policy_2_w": -140.59042358398438, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 0.3695884048938751, "rewards_train/1-l": 0.05888812616467476, "rewards_train/1-w": 0.48615914583206177, "rewards_train/2-2": 0.47370898723602295, "rewards_train/2-w": 0.47377023100852966, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.125, "rewards_train/margins": 0.427271019667387, "rewards_train/margins_1": 0.11657074093818665, "rewards_train/margins_2": -6.124377250671387e-05, "step": 16 }, { "epoch": 0.05, "logps_train/policy_1_2": -180.42315673828125, "logps_train/policy_1_l": -152.01626586914062, "logps_train/policy_1_w": -100.00048828125, "logps_train/policy_2_2": -161.1021270751953, "logps_train/policy_2_w": -113.49790954589844, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 0.7424498796463013, "rewards_train/1-l": 0.16556131839752197, "rewards_train/1-w": 0.2546384930610657, "rewards_train/2-2": 0.8597092032432556, "rewards_train/2-w": 0.25020867586135864, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.0890771746635437, "rewards_train/margins_1": -0.4878113865852356, "rewards_train/margins_2": 0.609500527381897, "step": 16 }, { "epoch": 0.05, "logps_train/policy_1_2": -109.46609497070312, "logps_train/policy_1_l": -116.61509704589844, "logps_train/policy_1_w": -132.54034423828125, "logps_train/policy_2_2": -91.65847778320312, "logps_train/policy_2_w": -160.33834838867188, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -116.5, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -95.5, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.3955773413181305, "rewards_train/1-l": 0.0029437243938446045, "rewards_train/1-w": 0.9240914583206177, "rewards_train/2-2": 0.3935280442237854, "rewards_train/2-w": 0.7411656975746155, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.9211477339267731, "rewards_train/margins_1": 0.5285141170024872, "rewards_train/margins_2": -0.3476376533508301, "step": 16 }, { "epoch": 0.05, "logps_train/policy_1_2": -132.7870330810547, "logps_train/policy_1_l": -118.53572845458984, "logps_train/policy_1_w": -143.51406860351562, "logps_train/policy_2_2": -112.0196533203125, "logps_train/policy_2_w": -161.21661376953125, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.2821851670742035, "rewards_train/1-l": 0.07430823147296906, "rewards_train/1-w": 0.6876554489135742, "rewards_train/2-2": 0.35892340540885925, "rewards_train/2-w": 0.6486521363258362, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.0, "rewards_train/margins": 0.6133472174406052, "rewards_train/margins_1": 0.4054702818393707, "rewards_train/margins_2": -0.28972873091697693, "step": 16 }, { "epoch": 0.05, "logps_train/policy_1_2": -111.7526626586914, "logps_train/policy_1_l": -60.22371292114258, "logps_train/policy_1_w": -74.40137481689453, "logps_train/policy_2_2": -101.12030792236328, "logps_train/policy_2_w": -84.28485107421875, "logps_train/ref_1_2": -116.5, "logps_train/ref_1_l": -59.25, "logps_train/ref_1_w": -76.0, "logps_train/ref_2_2": -107.5, "logps_train/ref_2_w": -86.0, "rewards_train/1-2": 0.5079370737075806, "rewards_train/1-l": -0.0835040807723999, "rewards_train/1-w": 0.19345636665821075, "rewards_train/2-2": 0.6326956748962402, "rewards_train/2-w": 0.17932716012001038, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.27696044743061066, "rewards_train/margins_1": -0.3144807070493698, "rewards_train/margins_2": 0.45336851477622986, "step": 16 }, { "epoch": 0.05, "logps_train/policy_1_2": -96.59587860107422, "logps_train/policy_1_l": -189.14833068847656, "logps_train/policy_1_w": -80.2860336303711, "logps_train/policy_2_2": -84.31172180175781, "logps_train/policy_2_w": -99.91561889648438, "logps_train/ref_1_2": -98.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -82.0, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -101.0, "rewards_train/1-2": 0.13455277681350708, "rewards_train/1-l": 0.0464949831366539, "rewards_train/1-w": 0.15919014811515808, "rewards_train/2-2": 0.18484413623809814, "rewards_train/2-w": 0.13109475374221802, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.11269516497850418, "rewards_train/margins_1": 0.024637371301651, "rewards_train/margins_2": 0.05374938249588013, "step": 16 }, { "epoch": 0.05, "logps_train/policy_1_2": -228.20486450195312, "logps_train/policy_1_l": -203.92417907714844, "logps_train/policy_1_w": -175.55923461914062, "logps_train/policy_2_2": -199.44912719726562, "logps_train/policy_2_w": -212.04055786132812, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 1.1217012405395508, "rewards_train/1-l": -0.19241729378700256, "rewards_train/1-w": 1.3956387042999268, "rewards_train/2-2": 1.2871196269989014, "rewards_train/2-w": 1.174068570137024, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 1.5880559980869293, "rewards_train/margins_1": 0.273937463760376, "rewards_train/margins_2": 0.11305105686187744, "step": 16 }, { "epoch": 0.05, "logps_train/policy_1_2": -273.18756103515625, "logps_train/policy_1_l": -264.4315185546875, "logps_train/policy_1_w": -188.2294158935547, "logps_train/policy_2_2": -241.1199951171875, "logps_train/policy_2_w": -213.5960693359375, "logps_train/ref_1_2": -290.0, "logps_train/ref_1_l": -264.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -260.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 1.6922814846038818, "rewards_train/1-l": -0.07440219819545746, "rewards_train/1-w": 1.6094794273376465, "rewards_train/2-2": 1.9472788572311401, "rewards_train/2-w": 1.3704708814620972, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.683881625533104, "rewards_train/margins_1": -0.08280205726623535, "rewards_train/margins_2": 0.576807975769043, "step": 17 }, { "epoch": 0.05, "logps_train/policy_1_2": -257.5032958984375, "logps_train/policy_1_l": -272.12152099609375, "logps_train/policy_1_w": -252.669677734375, "logps_train/policy_2_2": -219.43975830078125, "logps_train/policy_2_w": -291.2744140625, "logps_train/ref_1_2": -272.0, "logps_train/ref_1_l": -274.0, "logps_train/ref_1_w": -270.0, "logps_train/ref_2_2": -236.0, "logps_train/ref_2_w": -304.0, "rewards_train/1-2": 1.5496714115142822, "rewards_train/1-l": 0.200347900390625, "rewards_train/1-w": 1.795532464981079, "rewards_train/2-2": 1.6935245990753174, "rewards_train/2-w": 1.4225585460662842, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.595184564590454, "rewards_train/margins_1": 0.24586105346679688, "rewards_train/margins_2": 0.2709660530090332, "step": 17 }, { "epoch": 0.05, "logps_train/policy_1_2": -156.82501220703125, "logps_train/policy_1_l": -186.73757934570312, "logps_train/policy_1_w": -133.5727081298828, "logps_train/policy_2_2": -132.19581604003906, "logps_train/policy_2_w": -163.88955688476562, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 0.5901548862457275, "rewards_train/1-l": -0.3436792492866516, "rewards_train/1-w": 0.7567914724349976, "rewards_train/2-2": 0.6679180860519409, "rewards_train/2-w": 0.6243258118629456, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.1004707217216492, "rewards_train/margins_1": 0.16663658618927002, "rewards_train/margins_2": 0.04359227418899536, "step": 17 }, { "epoch": 0.05, "logps_train/policy_1_2": -39.63433074951172, "logps_train/policy_1_l": -21.792943954467773, "logps_train/policy_1_w": -67.44794464111328, "logps_train/policy_2_2": -29.145038604736328, "logps_train/policy_2_w": -88.91587829589844, "logps_train/ref_1_2": -42.25, "logps_train/ref_1_l": -22.875, "logps_train/ref_1_w": -76.0, "logps_train/ref_2_2": -32.5, "logps_train/ref_2_w": -95.5, "rewards_train/1-2": 0.2701605558395386, "rewards_train/1-l": 0.10556891560554504, "rewards_train/1-w": 0.868145227432251, "rewards_train/2-2": 0.3456522822380066, "rewards_train/2-w": 0.6576303839683533, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.7625763118267059, "rewards_train/margins_1": 0.5979846715927124, "rewards_train/margins_2": -0.3119781017303467, "step": 17 }, { "epoch": 0.05, "logps_train/policy_1_2": -222.19583129882812, "logps_train/policy_1_l": -196.02328491210938, "logps_train/policy_1_w": -170.5018768310547, "logps_train/policy_2_2": -199.61785888671875, "logps_train/policy_2_w": -213.3872528076172, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 1.2085429430007935, "rewards_train/1-l": -0.3624843955039978, "rewards_train/1-w": 1.5123121738433838, "rewards_train/2-2": 1.3475890159606934, "rewards_train/2-w": 1.5581501722335815, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 1.8747965693473816, "rewards_train/margins_1": 0.30376923084259033, "rewards_train/margins_2": -0.21056115627288818, "step": 17 }, { "epoch": 0.05, "logps_train/policy_1_2": -175.37637329101562, "logps_train/policy_1_l": -202.9580535888672, "logps_train/policy_1_w": -117.86785125732422, "logps_train/policy_2_2": -156.4920196533203, "logps_train/policy_2_w": -134.1096649169922, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 0.8545490503311157, "rewards_train/1-l": 0.004781361669301987, "rewards_train/1-w": 0.44641777873039246, "rewards_train/2-2": 0.9293137788772583, "rewards_train/2-w": 0.5374712944030762, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.44163641706109047, "rewards_train/margins_1": -0.40813127160072327, "rewards_train/margins_2": 0.39184248447418213, "step": 17 }, { "epoch": 0.05, "logps_train/policy_1_2": -171.8105010986328, "logps_train/policy_1_l": -116.5873031616211, "logps_train/policy_1_w": -143.42950439453125, "logps_train/policy_2_2": -151.50918579101562, "logps_train/policy_2_w": -175.30010986328125, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.1111373901367188, "rewards_train/1-l": -0.18451189994812012, "rewards_train/1-w": 1.1211107969284058, "rewards_train/2-2": 1.1482999324798584, "rewards_train/2-w": 1.022334098815918, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.3056226968765259, "rewards_train/margins_1": 0.009973406791687012, "rewards_train/margins_2": 0.12596583366394043, "step": 17 }, { "epoch": 0.05, "logps_train/policy_1_2": -105.61446380615234, "logps_train/policy_1_l": -97.73454284667969, "logps_train/policy_1_w": -98.39651489257812, "logps_train/policy_2_2": -93.33590698242188, "logps_train/policy_2_w": -110.6649398803711, "logps_train/ref_1_2": -108.5, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 0.28542858362197876, "rewards_train/1-l": -0.11440864205360413, "rewards_train/1-w": 0.5806612968444824, "rewards_train/2-2": 0.2828157842159271, "rewards_train/2-w": 0.5038178563117981, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 0.6950699388980865, "rewards_train/margins_1": 0.29523271322250366, "rewards_train/margins_2": -0.22100207209587097, "step": 17 }, { "epoch": 0.05, "learning_rate": 2.647058823529412e-06, "loss": 1.3814, "step": 18 }, { "epoch": 0.05, "logps_train/policy_1_2": -144.62576293945312, "logps_train/policy_1_l": -131.30075073242188, "logps_train/policy_1_w": -137.9380645751953, "logps_train/policy_2_2": -123.87394714355469, "logps_train/policy_2_w": -165.86624145507812, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 0.428829550743103, "rewards_train/1-l": -0.09994915127754211, "rewards_train/1-w": 0.45150652527809143, "rewards_train/2-2": 0.4610421061515808, "rewards_train/2-w": 0.363377183675766, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.5514556765556335, "rewards_train/margins_1": 0.022676974534988403, "rewards_train/margins_2": 0.09766492247581482, "step": 18 }, { "epoch": 0.05, "logps_train/policy_1_2": -165.05874633789062, "logps_train/policy_1_l": -78.91055297851562, "logps_train/policy_1_w": -118.12663269042969, "logps_train/policy_2_2": -139.8099822998047, "logps_train/policy_2_w": -147.07398986816406, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -79.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.0128753185272217, "rewards_train/1-l": -0.0012115822173655033, "rewards_train/1-w": 0.8826488256454468, "rewards_train/2-2": 1.3338464498519897, "rewards_train/2-w": 0.798069417476654, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.8838604078628123, "rewards_train/margins_1": -0.1302264928817749, "rewards_train/margins_2": 0.5357770323753357, "step": 18 }, { "epoch": 0.05, "logps_train/policy_1_2": -252.84317016601562, "logps_train/policy_1_l": -226.08993530273438, "logps_train/policy_1_w": -225.56361389160156, "logps_train/policy_2_2": -223.09445190429688, "logps_train/policy_2_w": -252.08343505859375, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -225.0, "logps_train/ref_1_w": -249.0, "logps_train/ref_2_2": -240.0, "logps_train/ref_2_w": -272.0, "rewards_train/1-2": 1.4836509227752686, "rewards_train/1-l": -0.13477599620819092, "rewards_train/1-w": 2.3057470321655273, "rewards_train/2-2": 1.7303977012634277, "rewards_train/2-w": 1.9533741474151611, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.4405230283737183, "rewards_train/margins_1": 0.8220961093902588, "rewards_train/margins_2": -0.2229764461517334, "step": 18 }, { "epoch": 0.05, "logps_train/policy_1_2": -121.31100463867188, "logps_train/policy_1_l": -113.45816040039062, "logps_train/policy_1_w": -136.14706420898438, "logps_train/policy_2_2": -103.51876831054688, "logps_train/policy_2_w": -153.2994384765625, "logps_train/ref_1_2": -126.5, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -110.5, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 0.5157747268676758, "rewards_train/1-l": -0.2673979103565216, "rewards_train/1-w": 0.922012448310852, "rewards_train/2-2": 0.7168728113174438, "rewards_train/2-w": 0.9091181755065918, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 1.1894103586673737, "rewards_train/margins_1": 0.40623772144317627, "rewards_train/margins_2": -0.19224536418914795, "step": 18 }, { "epoch": 0.05, "logps_train/policy_1_2": -162.6143798828125, "logps_train/policy_1_l": -147.66024780273438, "logps_train/policy_1_w": -95.42575073242188, "logps_train/policy_2_2": -144.072998046875, "logps_train/policy_2_w": -101.39708709716797, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -103.0, "rewards_train/1-2": 0.5784065127372742, "rewards_train/1-l": -0.2332122027873993, "rewards_train/1-w": 0.1296902298927307, "rewards_train/2-2": 0.6341069936752319, "rewards_train/2-w": 0.17669695615768433, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.36290243268013, "rewards_train/margins_1": -0.44871628284454346, "rewards_train/margins_2": 0.4574100375175476, "step": 18 }, { "epoch": 0.05, "logps_train/policy_1_2": -195.93081665039062, "logps_train/policy_1_l": -137.40078735351562, "logps_train/policy_1_w": -163.3732452392578, "logps_train/policy_2_2": -171.49899291992188, "logps_train/policy_2_w": -182.2297821044922, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.4885592460632324, "rewards_train/1-l": -0.09710916131734848, "rewards_train/1-w": 1.2626757621765137, "rewards_train/2-2": 1.5700223445892334, "rewards_train/2-w": 1.1332716941833496, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.3597849234938622, "rewards_train/margins_1": -0.22588348388671875, "rewards_train/margins_2": 0.4367506504058838, "step": 18 }, { "epoch": 0.05, "logps_train/policy_1_2": -169.73065185546875, "logps_train/policy_1_l": -120.93588256835938, "logps_train/policy_1_w": -100.55451965332031, "logps_train/policy_2_2": -145.4636688232422, "logps_train/policy_2_w": -117.94642639160156, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 1.2417796850204468, "rewards_train/1-l": -0.10765031725168228, "rewards_train/1-w": 0.5250163078308105, "rewards_train/2-2": 1.2489453554153442, "rewards_train/2-w": 0.4045758545398712, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.6326666250824928, "rewards_train/margins_1": -0.7167633771896362, "rewards_train/margins_2": 0.844369500875473, "step": 18 }, { "epoch": 0.05, "logps_train/policy_1_2": -162.26133728027344, "logps_train/policy_1_l": -83.0873031616211, "logps_train/policy_1_w": -106.9678955078125, "logps_train/policy_2_2": -140.29058837890625, "logps_train/policy_2_w": -124.72532653808594, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -81.5, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 1.2894902229309082, "rewards_train/1-l": -0.1858302652835846, "rewards_train/1-w": 1.0883657932281494, "rewards_train/2-2": 1.3939886093139648, "rewards_train/2-w": 1.0274670124053955, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.274196058511734, "rewards_train/margins_1": -0.2011244297027588, "rewards_train/margins_2": 0.36652159690856934, "step": 18 }, { "epoch": 0.06, "logps_train/policy_1_2": -124.19042205810547, "logps_train/policy_1_l": -101.47900390625, "logps_train/policy_1_w": -142.66574096679688, "logps_train/policy_2_2": -97.47928619384766, "logps_train/policy_2_w": -173.80023193359375, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -102.5, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 0.4575199484825134, "rewards_train/1-l": -0.38149434328079224, "rewards_train/1-w": 1.6531517505645752, "rewards_train/2-2": 0.5137897729873657, "rewards_train/2-w": 1.36392343044281, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.0346460938453674, "rewards_train/margins_1": 1.1956318020820618, "rewards_train/margins_2": -0.8501336574554443, "step": 19 }, { "epoch": 0.06, "logps_train/policy_1_2": -116.57504272460938, "logps_train/policy_1_l": -109.45762634277344, "logps_train/policy_1_w": -99.84562683105469, "logps_train/policy_2_2": -92.53535461425781, "logps_train/policy_2_w": -125.0888442993164, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -101.5, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 0.6440588235855103, "rewards_train/1-l": -0.08520308136940002, "rewards_train/1-w": 0.8779500722885132, "rewards_train/2-2": 0.9054492712020874, "rewards_train/2-w": 0.6622097492218018, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.9631531536579132, "rewards_train/margins_1": 0.23389124870300293, "rewards_train/margins_2": 0.24323952198028564, "step": 19 }, { "epoch": 0.06, "logps_train/policy_1_2": -226.54348754882812, "logps_train/policy_1_l": -128.7094268798828, "logps_train/policy_1_w": -167.10873413085938, "logps_train/policy_2_2": -193.58486938476562, "logps_train/policy_2_w": -213.12576293945312, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.7104942798614502, "rewards_train/1-l": -0.5537554621696472, "rewards_train/1-w": 1.2649073600769043, "rewards_train/2-2": 1.6995208263397217, "rewards_train/2-w": 0.858516275882721, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.8186628222465515, "rewards_train/margins_1": -0.4455869197845459, "rewards_train/margins_2": 0.8410045504570007, "step": 19 }, { "epoch": 0.06, "logps_train/policy_1_2": -88.26033020019531, "logps_train/policy_1_l": -73.96534729003906, "logps_train/policy_1_w": -95.32278442382812, "logps_train/policy_2_2": -74.88465881347656, "logps_train/policy_2_w": -111.38172912597656, "logps_train/ref_1_2": -92.0, "logps_train/ref_1_l": -72.0, "logps_train/ref_1_w": -106.5, "logps_train/ref_2_2": -78.5, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 0.3380294442176819, "rewards_train/1-l": -0.17690551280975342, "rewards_train/1-w": 1.1239714622497559, "rewards_train/2-2": 0.36700332164764404, "rewards_train/2-w": 1.1790149211883545, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 1.3008769750595093, "rewards_train/margins_1": 0.785942018032074, "rewards_train/margins_2": -0.8120115995407104, "step": 19 }, { "epoch": 0.06, "logps_train/policy_1_2": -171.83262634277344, "logps_train/policy_1_l": -142.31076049804688, "logps_train/policy_1_w": -88.92682647705078, "logps_train/policy_2_2": -140.06893920898438, "logps_train/policy_2_w": -107.4935073852539, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -95.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -113.0, "rewards_train/1-2": 1.4495491981506348, "rewards_train/1-l": -0.15517204999923706, "rewards_train/1-w": 0.5881767272949219, "rewards_train/2-2": 1.6399811506271362, "rewards_train/2-w": 0.5279932618141174, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 0.7433487772941589, "rewards_train/margins_1": -0.8613724708557129, "rewards_train/margins_2": 1.1119878888130188, "step": 19 }, { "epoch": 0.06, "logps_train/policy_1_2": -149.97683715820312, "logps_train/policy_1_l": -129.79588317871094, "logps_train/policy_1_w": -178.29820251464844, "logps_train/policy_2_2": -124.4946060180664, "logps_train/policy_2_w": -204.7112274169922, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 0.9960663318634033, "rewards_train/1-l": -0.3194318115711212, "rewards_train/1-w": 2.0311174392700195, "rewards_train/2-2": 1.0685076713562012, "rewards_train/2-w": 1.9804391860961914, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.3505492508411407, "rewards_train/margins_1": 1.0350511074066162, "rewards_train/margins_2": -0.9119315147399902, "step": 19 }, { "epoch": 0.06, "logps_train/policy_1_2": -113.36199951171875, "logps_train/policy_1_l": -81.19075012207031, "logps_train/policy_1_w": -62.24498748779297, "logps_train/policy_2_2": -96.58985900878906, "logps_train/policy_2_w": -73.38230895996094, "logps_train/ref_1_2": -117.5, "logps_train/ref_1_l": -76.0, "logps_train/ref_1_w": -68.5, "logps_train/ref_2_2": -102.5, "logps_train/ref_2_w": -77.5, "rewards_train/1-2": 0.40344905853271484, "rewards_train/1-l": -0.49387964606285095, "rewards_train/1-w": 0.6122198700904846, "rewards_train/2-2": 0.5802720785140991, "rewards_train/2-w": 0.418410062789917, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.1060995161533356, "rewards_train/margins_1": 0.20877081155776978, "rewards_train/margins_2": 0.16186201572418213, "step": 19 }, { "epoch": 0.06, "logps_train/policy_1_2": -141.78131103515625, "logps_train/policy_1_l": -118.14059448242188, "logps_train/policy_1_w": -81.04265594482422, "logps_train/policy_2_2": -112.63811492919922, "logps_train/policy_2_w": -97.05291748046875, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -84.5, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -99.0, "rewards_train/1-2": 0.5765575170516968, "rewards_train/1-l": -0.435348778963089, "rewards_train/1-w": 0.32659366726875305, "rewards_train/2-2": 0.820563793182373, "rewards_train/2-w": 0.17400459945201874, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.761942446231842, "rewards_train/margins_1": -0.24996384978294373, "rewards_train/margins_2": 0.6465591937303543, "step": 19 }, { "epoch": 0.06, "learning_rate": 2.9411764705882355e-06, "loss": 1.3293, "step": 20 }, { "epoch": 0.06, "logps_train/policy_1_2": -147.61611938476562, "logps_train/policy_1_l": -141.81832885742188, "logps_train/policy_1_w": -123.29103088378906, "logps_train/policy_2_2": -127.1159439086914, "logps_train/policy_2_w": -143.69049072265625, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.702842116355896, "rewards_train/1-l": 0.21113654971122742, "rewards_train/1-w": 1.0771464109420776, "rewards_train/2-2": 0.8727807998657227, "rewards_train/2-w": 0.8520452976226807, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.8660098612308502, "rewards_train/margins_1": 0.37430429458618164, "rewards_train/margins_2": 0.020735502243041992, "step": 20 }, { "epoch": 0.06, "logps_train/policy_1_2": -89.59528350830078, "logps_train/policy_1_l": -70.00181579589844, "logps_train/policy_1_w": -90.21430969238281, "logps_train/policy_2_2": -74.48759460449219, "logps_train/policy_2_w": -115.28770446777344, "logps_train/ref_1_2": -94.0, "logps_train/ref_1_l": -68.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -79.5, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 0.4482842683792114, "rewards_train/1-l": -0.23494645953178406, "rewards_train/1-w": 0.7410688400268555, "rewards_train/2-2": 0.48561614751815796, "rewards_train/2-w": 0.465760201215744, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.9760152995586395, "rewards_train/margins_1": 0.29278457164764404, "rewards_train/margins_2": 0.01985594630241394, "step": 20 }, { "epoch": 0.06, "logps_train/policy_1_2": -235.23486328125, "logps_train/policy_1_l": -176.03042602539062, "logps_train/policy_1_w": -210.76954650878906, "logps_train/policy_2_2": -213.60110473632812, "logps_train/policy_2_w": -231.4189453125, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -226.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 1.0702624320983887, "rewards_train/1-l": -0.42179232835769653, "rewards_train/1-w": 1.5042955875396729, "rewards_train/2-2": 1.4023910760879517, "rewards_train/2-w": 1.4081052541732788, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.9260879158973694, "rewards_train/margins_1": 0.4340331554412842, "rewards_train/margins_2": -0.0057141780853271484, "step": 20 }, { "epoch": 0.06, "logps_train/policy_1_2": -122.78520202636719, "logps_train/policy_1_l": -190.97808837890625, "logps_train/policy_1_w": -155.0978546142578, "logps_train/policy_2_2": -108.90835571289062, "logps_train/policy_2_w": -169.78138732910156, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.0144485235214233, "rewards_train/1-l": -0.09898146241903305, "rewards_train/1-w": 0.8738086223602295, "rewards_train/2-2": 1.0427584648132324, "rewards_train/2-w": 0.7734240293502808, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.9727900847792625, "rewards_train/margins_1": -0.14063990116119385, "rewards_train/margins_2": 0.26933443546295166, "step": 20 }, { "epoch": 0.06, "logps_train/policy_1_2": -112.1622543334961, "logps_train/policy_1_l": -99.89228820800781, "logps_train/policy_1_w": -112.80818176269531, "logps_train/policy_2_2": -94.50535583496094, "logps_train/policy_2_w": -152.06793212890625, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 0.4060402512550354, "rewards_train/1-l": -0.5950878262519836, "rewards_train/1-w": 1.2723078727722168, "rewards_train/2-2": 0.36899533867836, "rewards_train/2-w": 0.9799255132675171, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 1.8673956990242004, "rewards_train/margins_1": 0.8662676215171814, "rewards_train/margins_2": -0.6109301745891571, "step": 20 }, { "epoch": 0.06, "logps_train/policy_1_2": -182.2371368408203, "logps_train/policy_1_l": -193.8363494873047, "logps_train/policy_1_w": -163.90701293945312, "logps_train/policy_2_2": -149.79925537109375, "logps_train/policy_2_w": -191.22238159179688, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 0.6755049228668213, "rewards_train/1-l": -0.9539483785629272, "rewards_train/1-w": 1.5967994928359985, "rewards_train/2-2": 0.7239810228347778, "rewards_train/2-w": 1.3179965019226074, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.550747871398926, "rewards_train/margins_1": 0.9212945699691772, "rewards_train/margins_2": -0.5940154790878296, "step": 20 }, { "epoch": 0.06, "logps_train/policy_1_2": -74.46234893798828, "logps_train/policy_1_l": -76.05781555175781, "logps_train/policy_1_w": -71.23318481445312, "logps_train/policy_2_2": -61.457115173339844, "logps_train/policy_2_w": -91.06661987304688, "logps_train/ref_1_2": -75.5, "logps_train/ref_1_l": -74.5, "logps_train/ref_1_w": -74.5, "logps_train/ref_2_2": -63.0, "logps_train/ref_2_w": -94.0, "rewards_train/1-2": 0.09399957209825516, "rewards_train/1-l": -0.15802741050720215, "rewards_train/1-w": 0.3424036502838135, "rewards_train/2-2": 0.13163241744041443, "rewards_train/2-w": 0.2749905288219452, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.5004310607910156, "rewards_train/margins_1": 0.24840407818555832, "rewards_train/margins_2": -0.14335811138153076, "step": 20 }, { "epoch": 0.06, "logps_train/policy_1_2": -164.4608917236328, "logps_train/policy_1_l": -136.3335723876953, "logps_train/policy_1_w": -152.07974243164062, "logps_train/policy_2_2": -143.08619689941406, "logps_train/policy_2_w": -184.1809844970703, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 0.5460978746414185, "rewards_train/1-l": -0.4700758755207062, "rewards_train/1-w": 1.3295255899429321, "rewards_train/2-2": 0.6538795232772827, "rewards_train/2-w": 1.1287778615951538, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 1.7996014654636383, "rewards_train/margins_1": 0.7834277153015137, "rewards_train/margins_2": -0.4748983383178711, "step": 20 }, { "epoch": 0.06, "logps_train/policy_1_2": -148.62620544433594, "logps_train/policy_1_l": -110.62461853027344, "logps_train/policy_1_w": -117.08901977539062, "logps_train/policy_2_2": -128.4787139892578, "logps_train/policy_2_w": -133.3080596923828, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -105.5, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.3631609678268433, "rewards_train/1-l": -0.5275021195411682, "rewards_train/1-w": 0.9059413075447083, "rewards_train/2-2": 1.681229591369629, "rewards_train/2-w": 0.6809127330780029, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.4334434270858765, "rewards_train/margins_1": -0.457219660282135, "rewards_train/margins_2": 1.000316858291626, "step": 21 }, { "epoch": 0.06, "logps_train/policy_1_2": -224.49673461914062, "logps_train/policy_1_l": -224.71702575683594, "logps_train/policy_1_w": -174.07981872558594, "logps_train/policy_2_2": -190.61279296875, "logps_train/policy_2_w": -210.64512634277344, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -214.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -211.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 2.003452777862549, "rewards_train/1-l": -1.0498278141021729, "rewards_train/1-w": 1.716628074645996, "rewards_train/2-2": 2.0574710369110107, "rewards_train/2-w": 1.681971549987793, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.766455888748169, "rewards_train/margins_1": -0.28682470321655273, "rewards_train/margins_2": 0.3754994869232178, "step": 21 }, { "epoch": 0.06, "logps_train/policy_1_2": -158.041015625, "logps_train/policy_1_l": -153.85130310058594, "logps_train/policy_1_w": -170.1798858642578, "logps_train/policy_2_2": -130.72836303710938, "logps_train/policy_2_w": -204.0013885498047, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 0.8189462423324585, "rewards_train/1-l": -0.44880199432373047, "rewards_train/1-w": 2.0632619857788086, "rewards_train/2-2": 1.162125587463379, "rewards_train/2-w": 1.7279870510101318, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.512063980102539, "rewards_train/margins_1": 1.24431574344635, "rewards_train/margins_2": -0.5658614635467529, "step": 21 }, { "epoch": 0.06, "logps_train/policy_1_2": -79.772705078125, "logps_train/policy_1_l": -58.55699157714844, "logps_train/policy_1_w": -36.338958740234375, "logps_train/policy_2_2": -66.81592559814453, "logps_train/policy_2_w": -42.22353744506836, "logps_train/ref_1_2": -88.5, "logps_train/ref_1_l": -57.0, "logps_train/ref_1_w": -39.5, "logps_train/ref_2_2": -75.0, "logps_train/ref_2_w": -44.25, "rewards_train/1-2": 0.8673586845397949, "rewards_train/1-l": -0.17396101355552673, "rewards_train/1-w": 0.3051665425300598, "rewards_train/2-2": 0.8356925845146179, "rewards_train/2-w": 0.21553683280944824, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.47912755608558655, "rewards_train/margins_1": -0.5621921420097351, "rewards_train/margins_2": 0.6201557517051697, "step": 21 }, { "epoch": 0.06, "logps_train/policy_1_2": -147.6759033203125, "logps_train/policy_1_l": -96.98442077636719, "logps_train/policy_1_w": -118.26879119873047, "logps_train/policy_2_2": -125.49156188964844, "logps_train/policy_2_w": -138.61859130859375, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -92.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.3495972156524658, "rewards_train/1-l": -0.48477041721343994, "rewards_train/1-w": 1.419995903968811, "rewards_train/2-2": 1.273500680923462, "rewards_train/2-w": 1.170953631401062, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.904766321182251, "rewards_train/margins_1": 0.07039868831634521, "rewards_train/margins_2": 0.1025470495223999, "step": 21 }, { "epoch": 0.06, "logps_train/policy_1_2": -128.27418518066406, "logps_train/policy_1_l": -71.13673400878906, "logps_train/policy_1_w": -81.63705444335938, "logps_train/policy_2_2": -113.95407104492188, "logps_train/policy_2_w": -90.34516143798828, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -68.0, "logps_train/ref_1_w": -90.5, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -98.5, "rewards_train/1-2": 1.4280507564544678, "rewards_train/1-l": -0.33850282430648804, "rewards_train/1-w": 0.8862937688827515, "rewards_train/2-2": 1.386624813079834, "rewards_train/2-w": 0.8109915852546692, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.2247965931892395, "rewards_train/margins_1": -0.5417569875717163, "rewards_train/margins_2": 0.5756332278251648, "step": 21 }, { "epoch": 0.06, "logps_train/policy_1_2": -179.61602783203125, "logps_train/policy_1_l": -163.22723388671875, "logps_train/policy_1_w": -165.48565673828125, "logps_train/policy_2_2": -150.55931091308594, "logps_train/policy_2_w": -206.51138305664062, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 0.7962101697921753, "rewards_train/1-l": -0.8706732392311096, "rewards_train/1-w": 1.4311221837997437, "rewards_train/2-2": 0.8706312775611877, "rewards_train/2-w": 1.0238622426986694, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.3017954230308533, "rewards_train/margins_1": 0.6349120140075684, "rewards_train/margins_2": -0.1532309651374817, "step": 21 }, { "epoch": 0.06, "logps_train/policy_1_2": -137.60214233398438, "logps_train/policy_1_l": -168.9893035888672, "logps_train/policy_1_w": -99.4310531616211, "logps_train/policy_2_2": -122.64116668701172, "logps_train/policy_2_w": -120.07669830322266, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 0.5100983381271362, "rewards_train/1-l": -0.6223679780960083, "rewards_train/1-w": 0.2131441831588745, "rewards_train/2-2": 0.43119555711746216, "rewards_train/2-w": 0.07045512646436691, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.8355121612548828, "rewards_train/margins_1": -0.2969541549682617, "rewards_train/margins_2": 0.36074043065309525, "step": 21 }, { "epoch": 0.07, "learning_rate": 3.2352941176470594e-06, "loss": 1.3026, "step": 22 }, { "epoch": 0.07, "logps_train/policy_1_2": -232.2125701904297, "logps_train/policy_1_l": -164.54244995117188, "logps_train/policy_1_w": -160.56732177734375, "logps_train/policy_2_2": -203.60365295410156, "logps_train/policy_2_w": -190.83253479003906, "logps_train/ref_1_2": -252.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -227.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 2.0224921703338623, "rewards_train/1-l": -0.6128392219543457, "rewards_train/1-w": 1.1932671070098877, "rewards_train/2-2": 2.3615095615386963, "rewards_train/2-w": 0.9792472720146179, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.8061063289642334, "rewards_train/margins_1": -0.8292250633239746, "rewards_train/margins_2": 1.3822622895240784, "step": 22 }, { "epoch": 0.07, "logps_train/policy_1_2": -184.34359741210938, "logps_train/policy_1_l": -136.3348846435547, "logps_train/policy_1_w": -147.1059112548828, "logps_train/policy_2_2": -152.49557495117188, "logps_train/policy_2_w": -182.58966064453125, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.2593902349472046, "rewards_train/1-l": -0.749308705329895, "rewards_train/1-w": 1.2237849235534668, "rewards_train/2-2": 1.5035675764083862, "rewards_train/2-w": 0.9191583395004272, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 1.9730936288833618, "rewards_train/margins_1": -0.03560531139373779, "rewards_train/margins_2": 0.584409236907959, "step": 22 }, { "epoch": 0.07, "logps_train/policy_1_2": -213.46148681640625, "logps_train/policy_1_l": -142.20724487304688, "logps_train/policy_1_w": -129.832763671875, "logps_train/policy_2_2": -186.41415405273438, "logps_train/policy_2_w": -156.2178955078125, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -203.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.4804128408432007, "rewards_train/1-l": -0.3980690538883209, "rewards_train/1-w": 1.062036395072937, "rewards_train/2-2": 1.6835854053497314, "rewards_train/2-w": 0.8508674502372742, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.460105448961258, "rewards_train/margins_1": -0.41837644577026367, "rewards_train/margins_2": 0.8327179551124573, "step": 22 }, { "epoch": 0.07, "logps_train/policy_1_2": -94.93157958984375, "logps_train/policy_1_l": -69.64456176757812, "logps_train/policy_1_w": -63.371490478515625, "logps_train/policy_2_2": -83.3070297241211, "logps_train/policy_2_w": -70.07662963867188, "logps_train/ref_1_2": -103.5, "logps_train/ref_1_l": -65.0, "logps_train/ref_1_w": -65.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -72.0, "rewards_train/1-2": 0.8689514398574829, "rewards_train/1-l": -0.4238312542438507, "rewards_train/1-w": 0.16753873229026794, "rewards_train/2-2": 0.9866802096366882, "rewards_train/2-w": 0.18032541871070862, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.5913699865341187, "rewards_train/margins_1": -0.701412707567215, "rewards_train/margins_2": 0.8063547909259796, "step": 22 }, { "epoch": 0.07, "logps_train/policy_1_2": -142.44512939453125, "logps_train/policy_1_l": -87.46256256103516, "logps_train/policy_1_w": -75.81967163085938, "logps_train/policy_2_2": -121.23670959472656, "logps_train/policy_2_w": -94.30826568603516, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -98.5, "rewards_train/1-2": 0.8757998943328857, "rewards_train/1-l": -0.4560222029685974, "rewards_train/1-w": 0.4024079442024231, "rewards_train/2-2": 1.0157819986343384, "rewards_train/2-w": 0.42854824662208557, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 0.8584301471710205, "rewards_train/margins_1": -0.47339195013046265, "rewards_train/margins_2": 0.5872337520122528, "step": 22 }, { "epoch": 0.07, "logps_train/policy_1_2": -127.81466674804688, "logps_train/policy_1_l": -123.45274353027344, "logps_train/policy_1_w": -106.02011108398438, "logps_train/policy_2_2": -110.8539810180664, "logps_train/policy_2_w": -120.81824493408203, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 0.8372831344604492, "rewards_train/1-l": -0.6785271167755127, "rewards_train/1-w": 0.5428614020347595, "rewards_train/2-2": 0.8325710296630859, "rewards_train/2-w": 0.24864482879638672, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.2213885188102722, "rewards_train/margins_1": -0.2944217324256897, "rewards_train/margins_2": 0.5839262008666992, "step": 22 }, { "epoch": 0.07, "logps_train/policy_1_2": -159.8966827392578, "logps_train/policy_1_l": -105.40967559814453, "logps_train/policy_1_w": -119.5119400024414, "logps_train/policy_2_2": -142.75985717773438, "logps_train/policy_2_w": -137.7212371826172, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.1447068452835083, "rewards_train/1-l": -0.7585455179214478, "rewards_train/1-w": 1.1523208618164062, "rewards_train/2-2": 1.613077163696289, "rewards_train/2-w": 1.0512163639068604, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.910866379737854, "rewards_train/margins_1": 0.007614016532897949, "rewards_train/margins_2": 0.5618607997894287, "step": 22 }, { "epoch": 0.07, "logps_train/policy_1_2": -185.94522094726562, "logps_train/policy_1_l": -165.24356079101562, "logps_train/policy_1_w": -242.14508056640625, "logps_train/policy_2_2": -166.28750610351562, "logps_train/policy_2_w": -261.53814697265625, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -264.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -280.0, "rewards_train/1-2": 0.9972745776176453, "rewards_train/1-l": -0.8649802207946777, "rewards_train/1-w": 2.272991418838501, "rewards_train/2-2": 0.8747642040252686, "rewards_train/2-w": 1.7383718490600586, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.1379716396331787, "rewards_train/margins_1": 1.2757168412208557, "rewards_train/margins_2": -0.86360764503479, "step": 22 }, { "epoch": 0.07, "logps_train/policy_1_2": -223.74307250976562, "logps_train/policy_1_l": -127.56904602050781, "logps_train/policy_1_w": -135.8162384033203, "logps_train/policy_2_2": -198.28018188476562, "logps_train/policy_2_w": -158.54220581054688, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -217.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.5694429874420166, "rewards_train/1-l": -0.6358115077018738, "rewards_train/1-w": 1.0910329818725586, "rewards_train/2-2": 1.8602650165557861, "rewards_train/2-w": 0.7551555633544922, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.7268444895744324, "rewards_train/margins_1": -0.478410005569458, "rewards_train/margins_2": 1.105109453201294, "step": 23 }, { "epoch": 0.07, "logps_train/policy_1_2": -179.69281005859375, "logps_train/policy_1_l": -230.19223022460938, "logps_train/policy_1_w": -206.43157958984375, "logps_train/policy_2_2": -157.695068359375, "logps_train/policy_2_w": -232.6234130859375, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -216.0, "logps_train/ref_1_w": -223.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 1.0330619812011719, "rewards_train/1-l": -1.4266949892044067, "rewards_train/1-w": 1.6927789449691772, "rewards_train/2-2": 1.367992877960205, "rewards_train/2-w": 1.575939655303955, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.119473934173584, "rewards_train/margins_1": 0.6597169637680054, "rewards_train/margins_2": -0.20794677734375, "step": 23 }, { "epoch": 0.07, "logps_train/policy_1_2": -222.88681030273438, "logps_train/policy_1_l": -122.341552734375, "logps_train/policy_1_w": -173.40386962890625, "logps_train/policy_2_2": -189.58551025390625, "logps_train/policy_2_w": -202.42507934570312, "logps_train/ref_1_2": -239.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.6519440412521362, "rewards_train/1-l": -0.7388423681259155, "rewards_train/1-w": 1.4385182857513428, "rewards_train/2-2": 2.0320749282836914, "rewards_train/2-w": 1.1278038024902344, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.1773606538772583, "rewards_train/margins_1": -0.21342575550079346, "rewards_train/margins_2": 0.904271125793457, "step": 23 }, { "epoch": 0.07, "logps_train/policy_1_2": -136.22238159179688, "logps_train/policy_1_l": -150.49090576171875, "logps_train/policy_1_w": -121.41211700439453, "logps_train/policy_2_2": -116.08130645751953, "logps_train/policy_2_w": -140.2367706298828, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 0.6293247938156128, "rewards_train/1-l": -0.9772161245346069, "rewards_train/1-w": 0.9560535550117493, "rewards_train/2-2": 0.6965560913085938, "rewards_train/2-w": 0.8239795565605164, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.9332696795463562, "rewards_train/margins_1": 0.3267287611961365, "rewards_train/margins_2": -0.1274234652519226, "step": 23 }, { "epoch": 0.07, "logps_train/policy_1_2": -128.9925079345703, "logps_train/policy_1_l": -129.83177185058594, "logps_train/policy_1_w": -159.86141967773438, "logps_train/policy_2_2": -110.12882232666016, "logps_train/policy_2_w": -199.00144958496094, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 0.42574965953826904, "rewards_train/1-l": -0.7497788667678833, "rewards_train/1-w": 1.3982317447662354, "rewards_train/2-2": 0.6418052911758423, "rewards_train/2-w": 0.8561055660247803, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 2.1480106115341187, "rewards_train/margins_1": 0.9724820852279663, "rewards_train/margins_2": -0.214300274848938, "step": 23 }, { "epoch": 0.07, "logps_train/policy_1_2": -175.96754455566406, "logps_train/policy_1_l": -189.05813598632812, "logps_train/policy_1_w": -128.1973419189453, "logps_train/policy_2_2": -151.7019500732422, "logps_train/policy_2_w": -162.96597290039062, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": 1.2227773666381836, "rewards_train/1-l": -1.1937049627304077, "rewards_train/1-w": 0.9622968435287476, "rewards_train/2-2": 1.3305859565734863, "rewards_train/2-w": 0.7479331493377686, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.1560018062591553, "rewards_train/margins_1": -0.26048052310943604, "rewards_train/margins_2": 0.5826528072357178, "step": 23 }, { "epoch": 0.07, "logps_train/policy_1_2": -248.87867736816406, "logps_train/policy_1_l": -188.1993408203125, "logps_train/policy_1_w": -199.25892639160156, "logps_train/policy_2_2": -216.23663330078125, "logps_train/policy_2_w": -231.80694580078125, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -238.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 2.180882453918457, "rewards_train/1-l": -0.9636849164962769, "rewards_train/1-w": 2.089733600616455, "rewards_train/2-2": 2.210712432861328, "rewards_train/2-w": 1.5130560398101807, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.053418517112732, "rewards_train/margins_1": -0.09114885330200195, "rewards_train/margins_2": 0.6976563930511475, "step": 23 }, { "epoch": 0.07, "logps_train/policy_1_2": -192.93557739257812, "logps_train/policy_1_l": -241.90390014648438, "logps_train/policy_1_w": -166.06643676757812, "logps_train/policy_2_2": -173.27734375, "logps_train/policy_2_w": -188.35391235351562, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": 1.3564411401748657, "rewards_train/1-l": -1.3239845037460327, "rewards_train/1-w": 2.0636684894561768, "rewards_train/2-2": 1.791015863418579, "rewards_train/2-w": 2.0614843368530273, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.3876529932022095, "rewards_train/margins_1": 0.707227349281311, "rewards_train/margins_2": -0.27046847343444824, "step": 23 }, { "epoch": 0.07, "learning_rate": 3.529411764705883e-06, "loss": 1.1656, "step": 24 }, { "epoch": 0.07, "logps_train/policy_1_2": -148.9499969482422, "logps_train/policy_1_l": -147.09591674804688, "logps_train/policy_1_w": -168.62464904785156, "logps_train/policy_2_2": -137.34494018554688, "logps_train/policy_2_w": -193.7506103515625, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 1.052266240119934, "rewards_train/1-l": -0.6267789006233215, "rewards_train/1-w": 1.5640978813171387, "rewards_train/2-2": 0.9908975958824158, "rewards_train/2-w": 1.1397829055786133, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.19087678194046, "rewards_train/margins_1": 0.5118316411972046, "rewards_train/margins_2": -0.1488853096961975, "step": 24 }, { "epoch": 0.07, "logps_train/policy_1_2": -165.75083923339844, "logps_train/policy_1_l": -127.4181137084961, "logps_train/policy_1_w": -153.2738037109375, "logps_train/policy_2_2": -136.79244995117188, "logps_train/policy_2_w": -187.12002563476562, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -116.5, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.1190567016601562, "rewards_train/1-l": -1.0676900148391724, "rewards_train/1-w": 1.113243818283081, "rewards_train/2-2": 1.2571808099746704, "rewards_train/2-w": 1.147372841835022, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.1809338331222534, "rewards_train/margins_1": -0.005812883377075195, "rewards_train/margins_2": 0.10980796813964844, "step": 24 }, { "epoch": 0.07, "logps_train/policy_1_2": -183.28036499023438, "logps_train/policy_1_l": -156.71791076660156, "logps_train/policy_1_w": -174.63136291503906, "logps_train/policy_2_2": -161.2232666015625, "logps_train/policy_2_w": -207.73410034179688, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 0.7563378810882568, "rewards_train/1-l": -1.4624152183532715, "rewards_train/1-w": 1.1806138753890991, "rewards_train/2-2": 0.9151742458343506, "rewards_train/2-w": 0.8500275611877441, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.6430290937423706, "rewards_train/margins_1": 0.4242759943008423, "rewards_train/margins_2": 0.06514668464660645, "step": 24 }, { "epoch": 0.07, "logps_train/policy_1_2": -160.82891845703125, "logps_train/policy_1_l": -136.49583435058594, "logps_train/policy_1_w": -180.58059692382812, "logps_train/policy_2_2": -139.0696258544922, "logps_train/policy_2_w": -212.61692810058594, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -205.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 1.4405447244644165, "rewards_train/1-l": -0.891185462474823, "rewards_train/1-w": 2.3950650691986084, "rewards_train/2-2": 1.4821001291275024, "rewards_train/2-w": 2.0211193561553955, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.2862505316734314, "rewards_train/margins_1": 0.9545203447341919, "rewards_train/margins_2": -0.5390192270278931, "step": 24 }, { "epoch": 0.07, "logps_train/policy_1_2": -147.3538055419922, "logps_train/policy_1_l": -159.1665802001953, "logps_train/policy_1_w": -159.28909301757812, "logps_train/policy_2_2": -129.69580078125, "logps_train/policy_2_w": -183.2630157470703, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 0.7876670360565186, "rewards_train/1-l": -1.396052598953247, "rewards_train/1-w": 1.3947242498397827, "rewards_train/2-2": 1.1749500036239624, "rewards_train/2-w": 1.0286790132522583, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.79077684879303, "rewards_train/margins_1": 0.6070572137832642, "rewards_train/margins_2": 0.1462709903717041, "step": 24 }, { "epoch": 0.07, "logps_train/policy_1_2": -137.14678955078125, "logps_train/policy_1_l": -197.0419158935547, "logps_train/policy_1_w": -95.28324890136719, "logps_train/policy_2_2": -112.12388610839844, "logps_train/policy_2_w": -119.81327056884766, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 0.497821569442749, "rewards_train/1-l": -0.582951545715332, "rewards_train/1-w": 0.688667893409729, "rewards_train/2-2": 0.8161271810531616, "rewards_train/2-w": 0.9327352643013, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 1.271619439125061, "rewards_train/margins_1": 0.19084632396697998, "rewards_train/margins_2": -0.11660808324813843, "step": 24 }, { "epoch": 0.07, "logps_train/policy_1_2": -105.73333740234375, "logps_train/policy_1_l": -91.05355834960938, "logps_train/policy_1_w": -92.40695190429688, "logps_train/policy_2_2": -93.55712890625, "logps_train/policy_2_w": -113.50999450683594, "logps_train/ref_1_2": -107.0, "logps_train/ref_1_l": -86.5, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -112.0, "rewards_train/1-2": 0.14072856307029724, "rewards_train/1-l": -0.4481292963027954, "rewards_train/1-w": -0.019795849919319153, "rewards_train/2-2": 0.023193784058094025, "rewards_train/2-w": -0.14553004503250122, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.42833344638347626, "rewards_train/margins_1": -0.1605244129896164, "rewards_train/margins_2": 0.16872382909059525, "step": 24 }, { "epoch": 0.07, "logps_train/policy_1_2": -230.38751220703125, "logps_train/policy_1_l": -212.47264099121094, "logps_train/policy_1_w": -173.40994262695312, "logps_train/policy_2_2": -200.8632354736328, "logps_train/policy_2_w": -201.73214721679688, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -220.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 1.7565608024597168, "rewards_train/1-l": -1.3410135507583618, "rewards_train/1-w": 1.6746315956115723, "rewards_train/2-2": 1.9074267148971558, "rewards_train/2-w": 1.4174104928970337, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.015645146369934, "rewards_train/margins_1": -0.08192920684814453, "rewards_train/margins_2": 0.49001622200012207, "step": 24 }, { "epoch": 0.07, "logps_train/policy_1_2": -151.7023162841797, "logps_train/policy_1_l": -130.01547241210938, "logps_train/policy_1_w": -155.742431640625, "logps_train/policy_2_2": -137.38226318359375, "logps_train/policy_2_w": -177.76339721679688, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.4407062530517578, "rewards_train/1-l": -1.1536965370178223, "rewards_train/1-w": 1.3351311683654785, "rewards_train/2-2": 1.48482084274292, "rewards_train/2-w": 0.9924107789993286, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.488827705383301, "rewards_train/margins_1": -0.1055750846862793, "rewards_train/margins_2": 0.4924100637435913, "step": 25 }, { "epoch": 0.07, "logps_train/policy_1_2": -195.6812744140625, "logps_train/policy_1_l": -226.75001525878906, "logps_train/policy_1_w": -172.18661499023438, "logps_train/policy_2_2": -172.55149841308594, "logps_train/policy_2_w": -204.99981689453125, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 2.2756218910217285, "rewards_train/1-l": -2.1617202758789062, "rewards_train/1-w": 1.475479006767273, "rewards_train/2-2": 2.1073503494262695, "rewards_train/2-w": 1.2047065496444702, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.637199282646179, "rewards_train/margins_1": -0.8001428842544556, "rewards_train/margins_2": 0.9026437997817993, "step": 25 }, { "epoch": 0.07, "logps_train/policy_1_2": -160.84075927734375, "logps_train/policy_1_l": -101.11752319335938, "logps_train/policy_1_w": -115.59320831298828, "logps_train/policy_2_2": -140.89495849609375, "logps_train/policy_2_w": -143.28701782226562, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 1.18623685836792, "rewards_train/1-l": -1.4127287864685059, "rewards_train/1-w": 0.8582570552825928, "rewards_train/2-2": 1.5323796272277832, "rewards_train/2-w": 0.8271576762199402, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.2709858417510986, "rewards_train/margins_1": -0.32797980308532715, "rewards_train/margins_2": 0.705221951007843, "step": 25 }, { "epoch": 0.07, "logps_train/policy_1_2": -128.27352905273438, "logps_train/policy_1_l": -179.0310821533203, "logps_train/policy_1_w": -167.24337768554688, "logps_train/policy_2_2": -112.9010238647461, "logps_train/policy_2_w": -175.46249389648438, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -117.5, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 0.10702276229858398, "rewards_train/1-l": -2.0935380458831787, "rewards_train/1-w": 1.1444127559661865, "rewards_train/2-2": 0.47708481550216675, "rewards_train/2-w": 1.309999942779541, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.2379508018493652, "rewards_train/margins_1": 1.0373899936676025, "rewards_train/margins_2": -0.8329151272773743, "step": 25 }, { "epoch": 0.07, "logps_train/policy_1_2": -113.30174255371094, "logps_train/policy_1_l": -106.3223876953125, "logps_train/policy_1_w": -108.2913818359375, "logps_train/policy_2_2": -90.4591293334961, "logps_train/policy_2_w": -136.6475372314453, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -97.5, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.3825206756591797, "rewards_train/1-l": -0.11422126740217209, "rewards_train/1-w": 0.9934694170951843, "rewards_train/2-2": 0.6932470798492432, "rewards_train/2-w": 0.9571211338043213, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 1.1076906844973564, "rewards_train/margins_1": 0.6109487414360046, "rewards_train/margins_2": -0.2638740539550781, "step": 25 }, { "epoch": 0.07, "logps_train/policy_1_2": -198.000244140625, "logps_train/policy_1_l": -159.2831573486328, "logps_train/policy_1_w": -145.28414916992188, "logps_train/policy_2_2": -180.721435546875, "logps_train/policy_2_w": -158.21517944335938, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 0.9132574796676636, "rewards_train/1-l": -0.9712353348731995, "rewards_train/1-w": 0.3739297688007355, "rewards_train/2-2": 1.1337143182754517, "rewards_train/2-w": 0.4093407094478607, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.345165103673935, "rewards_train/margins_1": -0.5393277108669281, "rewards_train/margins_2": 0.7243736088275909, "step": 25 }, { "epoch": 0.07, "logps_train/policy_1_2": -264.549560546875, "logps_train/policy_1_l": -168.58047485351562, "logps_train/policy_1_w": -150.98342895507812, "logps_train/policy_2_2": -212.3341064453125, "logps_train/policy_2_w": -185.37808227539062, "logps_train/ref_1_2": -278.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -234.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.295043706893921, "rewards_train/1-l": -1.5161521434783936, "rewards_train/1-w": 2.030270576477051, "rewards_train/2-2": 2.1322154998779297, "rewards_train/2-w": 1.827817440032959, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.5464227199554443, "rewards_train/margins_1": 0.7352268695831299, "rewards_train/margins_2": 0.3043980598449707, "step": 25 }, { "epoch": 0.07, "logps_train/policy_1_2": -220.05911254882812, "logps_train/policy_1_l": -169.5958251953125, "logps_train/policy_1_w": -129.48193359375, "logps_train/policy_2_2": -177.86492919921875, "logps_train/policy_2_w": -173.37887573242188, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.228462815284729, "rewards_train/1-l": -1.98430335521698, "rewards_train/1-w": 1.070054292678833, "rewards_train/2-2": 1.8244445323944092, "rewards_train/2-w": 0.8599267601966858, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.054357647895813, "rewards_train/margins_1": -0.158408522605896, "rewards_train/margins_2": 0.9645177721977234, "step": 25 }, { "epoch": 0.08, "learning_rate": 3.8235294117647055e-06, "loss": 1.3311, "step": 26 }, { "epoch": 0.08, "logps_train/policy_1_2": -157.42086791992188, "logps_train/policy_1_l": -136.3449249267578, "logps_train/policy_1_w": -121.20208740234375, "logps_train/policy_2_2": -134.69857788085938, "logps_train/policy_2_w": -139.9554443359375, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.2368199825286865, "rewards_train/1-l": -0.9098830223083496, "rewards_train/1-w": 0.5797914266586304, "rewards_train/2-2": 1.052408218383789, "rewards_train/2-w": 0.5650020837783813, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.48967444896698, "rewards_train/margins_1": -0.6570285558700562, "rewards_train/margins_2": 0.4874061346054077, "step": 26 }, { "epoch": 0.08, "logps_train/policy_1_2": -196.6791229248047, "logps_train/policy_1_l": -208.38595581054688, "logps_train/policy_1_w": -176.13833618164062, "logps_train/policy_2_2": -180.63290405273438, "logps_train/policy_2_w": -194.38385009765625, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": 1.5477131605148315, "rewards_train/1-l": -2.449532985687256, "rewards_train/1-w": 2.001790761947632, "rewards_train/2-2": 1.4679603576660156, "rewards_train/2-w": 1.8756765127182007, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.451323747634888, "rewards_train/margins_1": 0.4540776014328003, "rewards_train/margins_2": -0.40771615505218506, "step": 26 }, { "epoch": 0.08, "logps_train/policy_1_2": -165.64088439941406, "logps_train/policy_1_l": -158.74945068359375, "logps_train/policy_1_w": -151.83087158203125, "logps_train/policy_2_2": -153.86822509765625, "logps_train/policy_2_w": -165.36459350585938, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.3757553100585938, "rewards_train/1-l": -1.4698684215545654, "rewards_train/1-w": 0.8856627941131592, "rewards_train/2-2": 1.2389588356018066, "rewards_train/2-w": 0.7401031851768494, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.3555312156677246, "rewards_train/margins_1": -0.49009251594543457, "rewards_train/margins_2": 0.4988556504249573, "step": 26 }, { "epoch": 0.08, "logps_train/policy_1_2": -122.71672058105469, "logps_train/policy_1_l": -139.21775817871094, "logps_train/policy_1_w": -132.7540740966797, "logps_train/policy_2_2": -106.2005844116211, "logps_train/policy_2_w": -156.62179565429688, "logps_train/ref_1_2": -125.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 0.20645293593406677, "rewards_train/1-l": -1.8192857503890991, "rewards_train/1-w": 0.47669264674186707, "rewards_train/2-2": 0.1627541482448578, "rewards_train/2-w": 0.18751566112041473, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.295978397130966, "rewards_train/margins_1": 0.2702397108078003, "rewards_train/margins_2": -0.024761512875556946, "step": 26 }, { "epoch": 0.08, "logps_train/policy_1_2": -196.54942321777344, "logps_train/policy_1_l": -187.07667541503906, "logps_train/policy_1_w": -186.54287719726562, "logps_train/policy_2_2": -165.57345581054688, "logps_train/policy_2_w": -226.4033966064453, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -242.0, "rewards_train/1-2": 0.8200571537017822, "rewards_train/1-l": -1.4631373882293701, "rewards_train/1-w": 1.7957115173339844, "rewards_train/2-2": 1.2489044666290283, "rewards_train/2-w": 1.5721611976623535, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.2588489055633545, "rewards_train/margins_1": 0.9756543636322021, "rewards_train/margins_2": -0.3232567310333252, "step": 26 }, { "epoch": 0.08, "logps_train/policy_1_2": -217.03819274902344, "logps_train/policy_1_l": -194.58985900878906, "logps_train/policy_1_w": -141.82650756835938, "logps_train/policy_2_2": -182.86541748046875, "logps_train/policy_2_w": -166.03138732910156, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.6836810111999512, "rewards_train/1-l": -1.5152366161346436, "rewards_train/1-w": 1.8435204029083252, "rewards_train/2-2": 2.269707679748535, "rewards_train/2-w": 1.5316262245178223, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3587570190429688, "rewards_train/margins_1": 0.15983939170837402, "rewards_train/margins_2": 0.7380814552307129, "step": 26 }, { "epoch": 0.08, "logps_train/policy_1_2": -206.84603881835938, "logps_train/policy_1_l": -268.1994323730469, "logps_train/policy_1_w": -183.99761962890625, "logps_train/policy_2_2": -180.62051391601562, "logps_train/policy_2_w": -214.78919982910156, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -240.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 0.7903952598571777, "rewards_train/1-l": -2.7511940002441406, "rewards_train/1-w": 1.303361415863037, "rewards_train/2-2": 1.1129487752914429, "rewards_train/2-w": 0.7460802793502808, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.054555416107178, "rewards_train/margins_1": 0.5129661560058594, "rewards_train/margins_2": 0.3668684959411621, "step": 26 }, { "epoch": 0.08, "logps_train/policy_1_2": -223.65216064453125, "logps_train/policy_1_l": -210.29568481445312, "logps_train/policy_1_w": -205.94615173339844, "logps_train/policy_2_2": -198.68771362304688, "logps_train/policy_2_w": -233.365966796875, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -225.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -250.0, "rewards_train/1-2": 1.247285008430481, "rewards_train/1-l": -2.003202438354492, "rewards_train/1-w": 1.8819477558135986, "rewards_train/2-2": 1.7031025886535645, "rewards_train/2-w": 1.7493406534194946, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.885150194168091, "rewards_train/margins_1": 0.6346627473831177, "rewards_train/margins_2": -0.046238064765930176, "step": 26 }, { "epoch": 0.08, "logps_train/policy_1_2": -99.91272735595703, "logps_train/policy_1_l": -104.15032958984375, "logps_train/policy_1_w": -84.71205139160156, "logps_train/policy_2_2": -87.21036529541016, "logps_train/policy_2_w": -99.33697509765625, "logps_train/ref_1_2": -109.5, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -100.0, "rewards_train/1-2": 0.972399115562439, "rewards_train/1-l": -0.7732366323471069, "rewards_train/1-w": 0.3709823489189148, "rewards_train/2-2": 1.1191980838775635, "rewards_train/2-w": 0.08036580681800842, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.1442189812660217, "rewards_train/margins_1": -0.6014167666435242, "rewards_train/margins_2": 1.038832277059555, "step": 27 }, { "epoch": 0.08, "logps_train/policy_1_2": -216.31829833984375, "logps_train/policy_1_l": -187.48843383789062, "logps_train/policy_1_w": -155.6820068359375, "logps_train/policy_2_2": -185.36203002929688, "logps_train/policy_2_w": -193.10818481445312, "logps_train/ref_1_2": -229.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.2806696891784668, "rewards_train/1-l": -1.9261879920959473, "rewards_train/1-w": 1.3806264400482178, "rewards_train/2-2": 1.3403598070144653, "rewards_train/2-w": 0.8688691854476929, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.306814432144165, "rewards_train/margins_1": 0.09995675086975098, "rewards_train/margins_2": 0.47149062156677246, "step": 27 }, { "epoch": 0.08, "logps_train/policy_1_2": -189.40447998046875, "logps_train/policy_1_l": -132.85325622558594, "logps_train/policy_1_w": -165.64669799804688, "logps_train/policy_2_2": -160.26058959960938, "logps_train/policy_2_w": -190.56341552734375, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 1.2095508575439453, "rewards_train/1-l": -1.1337629556655884, "rewards_train/1-w": 1.2665810585021973, "rewards_train/2-2": 1.6426916122436523, "rewards_train/2-w": 1.2561578750610352, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.4003440141677856, "rewards_train/margins_1": 0.05703020095825195, "rewards_train/margins_2": 0.3865337371826172, "step": 27 }, { "epoch": 0.08, "logps_train/policy_1_2": -222.02581787109375, "logps_train/policy_1_l": -159.47572326660156, "logps_train/policy_1_w": -161.45384216308594, "logps_train/policy_2_2": -194.202880859375, "logps_train/policy_2_w": -202.44674682617188, "logps_train/ref_1_2": -239.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.7145073413848877, "rewards_train/1-l": -1.602064847946167, "rewards_train/1-w": 1.670143723487854, "rewards_train/2-2": 1.7861566543579102, "rewards_train/2-w": 1.187551736831665, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.272208571434021, "rewards_train/margins_1": -0.04436361789703369, "rewards_train/margins_2": 0.5986049175262451, "step": 27 }, { "epoch": 0.08, "logps_train/policy_1_2": -152.13380432128906, "logps_train/policy_1_l": -233.22164916992188, "logps_train/policy_1_w": -160.222412109375, "logps_train/policy_2_2": -126.65083312988281, "logps_train/policy_2_w": -190.59579467773438, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 0.7088366150856018, "rewards_train/1-l": -2.3713836669921875, "rewards_train/1-w": 1.8136968612670898, "rewards_train/2-2": 0.6377492547035217, "rewards_train/2-w": 1.3185458183288574, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.185080528259277, "rewards_train/margins_1": 1.104860246181488, "rewards_train/margins_2": -0.6807965636253357, "step": 27 }, { "epoch": 0.08, "logps_train/policy_1_2": -200.58087158203125, "logps_train/policy_1_l": -142.75421142578125, "logps_train/policy_1_w": -133.31918334960938, "logps_train/policy_2_2": -177.75477600097656, "logps_train/policy_2_w": -153.70660400390625, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.634880542755127, "rewards_train/1-l": -1.6215147972106934, "rewards_train/1-w": 0.9633934497833252, "rewards_train/2-2": 2.012803316116333, "rewards_train/2-w": 0.7434019446372986, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.5849082469940186, "rewards_train/margins_1": -0.6714870929718018, "rewards_train/margins_2": 1.2694013714790344, "step": 27 }, { "epoch": 0.08, "logps_train/policy_1_2": -193.18841552734375, "logps_train/policy_1_l": -155.74058532714844, "logps_train/policy_1_w": -166.1044464111328, "logps_train/policy_2_2": -166.63218688964844, "logps_train/policy_2_w": -191.7759552001953, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 1.590533971786499, "rewards_train/1-l": -1.3287463188171387, "rewards_train/1-w": 1.3598681688308716, "rewards_train/2-2": 1.9258434772491455, "rewards_train/2-w": 1.1317789554595947, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.6886144876480103, "rewards_train/margins_1": -0.23066580295562744, "rewards_train/margins_2": 0.7940645217895508, "step": 27 }, { "epoch": 0.08, "logps_train/policy_1_2": -248.3377227783203, "logps_train/policy_1_l": -193.27218627929688, "logps_train/policy_1_w": -180.42938232421875, "logps_train/policy_2_2": -205.91709899902344, "logps_train/policy_2_w": -231.5675048828125, "logps_train/ref_1_2": -264.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -243.0, "rewards_train/1-2": 1.541226863861084, "rewards_train/1-l": -2.3594443798065186, "rewards_train/1-w": 2.0883121490478516, "rewards_train/2-2": 2.211416006088257, "rewards_train/2-w": 1.183874487876892, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.44775652885437, "rewards_train/margins_1": 0.5470852851867676, "rewards_train/margins_2": 1.0275415182113647, "step": 27 }, { "epoch": 0.08, "learning_rate": 4.11764705882353e-06, "loss": 1.2097, "step": 28 }, { "epoch": 0.08, "logps_train/policy_1_2": -125.70258331298828, "logps_train/policy_1_l": -111.78266906738281, "logps_train/policy_1_w": -95.55030822753906, "logps_train/policy_2_2": -108.95913696289062, "logps_train/policy_2_w": -120.500244140625, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -126.5, "rewards_train/1-2": 0.8475152254104614, "rewards_train/1-l": -1.3957475423812866, "rewards_train/1-w": 0.8176738023757935, "rewards_train/2-2": 0.7611172199249268, "rewards_train/2-w": 0.6212645769119263, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.21342134475708, "rewards_train/margins_1": -0.02984142303466797, "rewards_train/margins_2": 0.1398526430130005, "step": 28 }, { "epoch": 0.08, "logps_train/policy_1_2": -108.14833068847656, "logps_train/policy_1_l": -110.43763732910156, "logps_train/policy_1_w": -99.25240325927734, "logps_train/policy_2_2": -92.30073547363281, "logps_train/policy_2_w": -113.94369506835938, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -99.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 0.8640731573104858, "rewards_train/1-l": -1.0599743127822876, "rewards_train/1-w": -0.044771596789360046, "rewards_train/2-2": 1.09414541721344, "rewards_train/2-w": 0.032973676919937134, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 1.0152027159929276, "rewards_train/margins_1": -0.9088447540998459, "rewards_train/margins_2": 1.0611717402935028, "step": 28 }, { "epoch": 0.08, "logps_train/policy_1_2": -143.08425903320312, "logps_train/policy_1_l": -172.07098388671875, "logps_train/policy_1_w": -126.18624877929688, "logps_train/policy_2_2": -128.04226684570312, "logps_train/policy_2_w": -146.8340301513672, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 0.22594866156578064, "rewards_train/1-l": -1.2830748558044434, "rewards_train/1-w": 1.2485625743865967, "rewards_train/2-2": 0.5613975524902344, "rewards_train/2-w": 1.0009715557098389, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.53163743019104, "rewards_train/margins_1": 1.022613912820816, "rewards_train/margins_2": -0.4395740032196045, "step": 28 }, { "epoch": 0.08, "logps_train/policy_1_2": -198.02798461914062, "logps_train/policy_1_l": -196.77255249023438, "logps_train/policy_1_w": -143.073486328125, "logps_train/policy_2_2": -168.673095703125, "logps_train/policy_2_w": -169.83535766601562, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 1.096421480178833, "rewards_train/1-l": -2.491708278656006, "rewards_train/1-w": 1.5633553266525269, "rewards_train/2-2": 1.339721918106079, "rewards_train/2-w": 1.2742764949798584, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.055063605308533, "rewards_train/margins_1": 0.46693384647369385, "rewards_train/margins_2": 0.0654454231262207, "step": 28 }, { "epoch": 0.08, "logps_train/policy_1_2": -199.14620971679688, "logps_train/policy_1_l": -135.2548828125, "logps_train/policy_1_w": -126.18280029296875, "logps_train/policy_2_2": -173.92721557617188, "logps_train/policy_2_w": -149.80886840820312, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.301005244255066, "rewards_train/1-l": -1.019628882408142, "rewards_train/1-w": 0.942657470703125, "rewards_train/2-2": 1.764358401298523, "rewards_train/2-w": 0.5988012552261353, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.962286353111267, "rewards_train/margins_1": -0.3583477735519409, "rewards_train/margins_2": 1.1655571460723877, "step": 28 }, { "epoch": 0.08, "logps_train/policy_1_2": -125.87387084960938, "logps_train/policy_1_l": -131.95362854003906, "logps_train/policy_1_w": -108.42813110351562, "logps_train/policy_2_2": -107.44324493408203, "logps_train/policy_2_w": -134.48919677734375, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 0.7094874382019043, "rewards_train/1-l": -1.3776869773864746, "rewards_train/1-w": 1.1353119611740112, "rewards_train/2-2": 0.8345821499824524, "rewards_train/2-w": 0.5495176315307617, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.512998938560486, "rewards_train/margins_1": 0.42582452297210693, "rewards_train/margins_2": 0.2850645184516907, "step": 28 }, { "epoch": 0.08, "logps_train/policy_1_2": -104.85882568359375, "logps_train/policy_1_l": -133.44375610351562, "logps_train/policy_1_w": -109.13714599609375, "logps_train/policy_2_2": -91.37409973144531, "logps_train/policy_2_w": -131.6439208984375, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -97.5, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": 0.7180237174034119, "rewards_train/1-l": -1.132266640663147, "rewards_train/1-w": 0.899175226688385, "rewards_train/2-2": 0.607121467590332, "rewards_train/2-w": 0.5504504442214966, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.031441867351532, "rewards_train/margins_1": 0.18115150928497314, "rewards_train/margins_2": 0.05667102336883545, "step": 28 }, { "epoch": 0.08, "logps_train/policy_1_2": -90.33224487304688, "logps_train/policy_1_l": -152.07749938964844, "logps_train/policy_1_w": -58.20826721191406, "logps_train/policy_2_2": -79.92436218261719, "logps_train/policy_2_w": -69.85009002685547, "logps_train/ref_1_2": -94.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -63.5, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -73.5, "rewards_train/1-2": 0.3370877504348755, "rewards_train/1-l": -2.036850929260254, "rewards_train/1-w": 0.5381577014923096, "rewards_train/2-2": 0.5097118020057678, "rewards_train/2-w": 0.3716316819190979, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.5750086307525635, "rewards_train/margins_1": 0.20106995105743408, "rewards_train/margins_2": 0.13808012008666992, "step": 28 }, { "epoch": 0.09, "logps_train/policy_1_2": -80.53732299804688, "logps_train/policy_1_l": -86.17782592773438, "logps_train/policy_1_w": -69.61662292480469, "logps_train/policy_2_2": -73.17041015625, "logps_train/policy_2_w": -83.88893127441406, "logps_train/ref_1_2": -88.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -74.5, "logps_train/ref_2_2": -80.0, "logps_train/ref_2_w": -86.5, "rewards_train/1-2": 0.7598910331726074, "rewards_train/1-l": -0.817245364189148, "rewards_train/1-w": 0.4769603908061981, "rewards_train/2-2": 0.6528077125549316, "rewards_train/2-w": 0.2653302550315857, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.294205754995346, "rewards_train/margins_1": -0.2829306423664093, "rewards_train/margins_2": 0.38747745752334595, "step": 29 }, { "epoch": 0.09, "logps_train/policy_1_2": -219.87973022460938, "logps_train/policy_1_l": -188.41993713378906, "logps_train/policy_1_w": -157.47190856933594, "logps_train/policy_2_2": -204.57656860351562, "logps_train/policy_2_w": -190.5449676513672, "logps_train/ref_1_2": -235.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -219.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.496402621269226, "rewards_train/1-l": -1.1416033506393433, "rewards_train/1-w": 1.1903090476989746, "rewards_train/2-2": 1.492342233657837, "rewards_train/2-w": 0.5548781156539917, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.331912398338318, "rewards_train/margins_1": -0.30609357357025146, "rewards_train/margins_2": 0.9374641180038452, "step": 29 }, { "epoch": 0.09, "logps_train/policy_1_2": -148.7061004638672, "logps_train/policy_1_l": -123.49983215332031, "logps_train/policy_1_w": -134.9868927001953, "logps_train/policy_2_2": -123.3387451171875, "logps_train/policy_2_w": -157.49781799316406, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -105.5, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.3981399536132812, "rewards_train/1-l": -1.8257651329040527, "rewards_train/1-w": 1.9091238975524902, "rewards_train/2-2": 1.730968713760376, "rewards_train/2-w": 1.6095936298370361, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.734889030456543, "rewards_train/margins_1": 0.510983943939209, "rewards_train/margins_2": 0.12137508392333984, "step": 29 }, { "epoch": 0.09, "logps_train/policy_1_2": -196.71566772460938, "logps_train/policy_1_l": -150.25494384765625, "logps_train/policy_1_w": -156.26698303222656, "logps_train/policy_2_2": -162.0457763671875, "logps_train/policy_2_w": -194.3654327392578, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.7409327030181885, "rewards_train/1-l": -0.9499078989028931, "rewards_train/1-w": 2.0873641967773438, "rewards_train/2-2": 2.02667236328125, "rewards_train/2-w": 1.6134555339813232, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.037272095680237, "rewards_train/margins_1": 0.3464314937591553, "rewards_train/margins_2": 0.41321682929992676, "step": 29 }, { "epoch": 0.09, "logps_train/policy_1_2": -142.8179931640625, "logps_train/policy_1_l": -134.96334838867188, "logps_train/policy_1_w": -111.97538757324219, "logps_train/policy_2_2": -119.36358642578125, "logps_train/policy_2_w": -128.28387451171875, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 1.2494512796401978, "rewards_train/1-l": -1.6064913272857666, "rewards_train/1-w": 0.8602742552757263, "rewards_train/2-2": 1.0948915481567383, "rewards_train/2-w": 0.4591137766838074, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.466765582561493, "rewards_train/margins_1": -0.38917702436447144, "rewards_train/margins_2": 0.6357777714729309, "step": 29 }, { "epoch": 0.09, "logps_train/policy_1_2": -166.341796875, "logps_train/policy_1_l": -193.20001220703125, "logps_train/policy_1_w": -142.05966186523438, "logps_train/policy_2_2": -146.63525390625, "logps_train/policy_2_w": -160.997314453125, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.46972566843032837, "rewards_train/1-l": -1.914923906326294, "rewards_train/1-w": 0.9940328598022461, "rewards_train/2-2": 0.8341313600540161, "rewards_train/2-w": 0.7690194249153137, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.90895676612854, "rewards_train/margins_1": 0.5243071913719177, "rewards_train/margins_2": 0.06511193513870239, "step": 29 }, { "epoch": 0.09, "logps_train/policy_1_2": -206.0703582763672, "logps_train/policy_1_l": -197.77102661132812, "logps_train/policy_1_w": -148.74957275390625, "logps_train/policy_2_2": -173.84072875976562, "logps_train/policy_2_w": -172.8779296875, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 0.9117149710655212, "rewards_train/1-l": -1.7388200759887695, "rewards_train/1-w": 1.3953555822372437, "rewards_train/2-2": 1.5042089223861694, "rewards_train/2-w": 1.5090808868408203, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.134175658226013, "rewards_train/margins_1": 0.4836406111717224, "rewards_train/margins_2": -0.004871964454650879, "step": 29 }, { "epoch": 0.09, "logps_train/policy_1_2": -176.469482421875, "logps_train/policy_1_l": -158.6962127685547, "logps_train/policy_1_w": -172.88211059570312, "logps_train/policy_2_2": -152.24237060546875, "logps_train/policy_2_w": -203.94976806640625, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -221.0, "rewards_train/1-2": 1.2585200071334839, "rewards_train/1-l": -1.4485276937484741, "rewards_train/1-w": 1.9656956195831299, "rewards_train/2-2": 1.5726385116577148, "rewards_train/2-w": 1.6886177062988281, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.414223313331604, "rewards_train/margins_1": 0.707175612449646, "rewards_train/margins_2": -0.11597919464111328, "step": 29 }, { "epoch": 0.09, "learning_rate": 4.411764705882353e-06, "loss": 1.3116, "step": 30 }, { "epoch": 0.09, "logps_train/policy_1_2": -71.36326599121094, "logps_train/policy_1_l": -76.8591079711914, "logps_train/policy_1_w": -76.98273468017578, "logps_train/policy_2_2": -50.93067169189453, "logps_train/policy_2_w": -99.5511474609375, "logps_train/ref_1_2": -78.0, "logps_train/ref_1_l": -73.0, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -60.5, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 0.687111496925354, "rewards_train/1-l": -0.43522706627845764, "rewards_train/1-w": 1.2925469875335693, "rewards_train/2-2": 0.9555654525756836, "rewards_train/2-w": 0.9453733563423157, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.727774053812027, "rewards_train/margins_1": 0.6054354906082153, "rewards_train/margins_2": 0.01019209623336792, "step": 30 }, { "epoch": 0.09, "logps_train/policy_1_2": -176.61233520507812, "logps_train/policy_1_l": -138.33424377441406, "logps_train/policy_1_w": -142.63308715820312, "logps_train/policy_2_2": -155.68771362304688, "logps_train/policy_2_w": -175.51065063476562, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.2247039079666138, "rewards_train/1-l": -1.395533800125122, "rewards_train/1-w": 1.9616913795471191, "rewards_train/2-2": 1.6015405654907227, "rewards_train/2-w": 1.5864344835281372, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.357225179672241, "rewards_train/margins_1": 0.7369874715805054, "rewards_train/margins_2": 0.01510608196258545, "step": 30 }, { "epoch": 0.09, "logps_train/policy_1_2": -121.22767639160156, "logps_train/policy_1_l": -107.81379699707031, "logps_train/policy_1_w": -104.2697982788086, "logps_train/policy_2_2": -95.54484558105469, "logps_train/policy_2_w": -126.59129333496094, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -102.5, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.8661969900131226, "rewards_train/1-l": -0.542218804359436, "rewards_train/1-w": 0.8870828151702881, "rewards_train/2-2": 1.067488431930542, "rewards_train/2-w": 0.7893071174621582, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 1.4293016195297241, "rewards_train/margins_1": 0.020885825157165527, "rewards_train/margins_2": 0.2781813144683838, "step": 30 }, { "epoch": 0.09, "logps_train/policy_1_2": -129.2935028076172, "logps_train/policy_1_l": -182.39295959472656, "logps_train/policy_1_w": -115.1502685546875, "logps_train/policy_2_2": -107.77181243896484, "logps_train/policy_2_w": -152.64199829101562, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 1.1081500053405762, "rewards_train/1-l": -2.561464786529541, "rewards_train/1-w": 1.2302849292755127, "rewards_train/2-2": 1.4560469388961792, "rewards_train/2-w": 0.633064866065979, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.7917497158050537, "rewards_train/margins_1": 0.12213492393493652, "rewards_train/margins_2": 0.8229820728302002, "step": 30 }, { "epoch": 0.09, "logps_train/policy_1_2": -157.732666015625, "logps_train/policy_1_l": -114.37236022949219, "logps_train/policy_1_w": -105.38418579101562, "logps_train/policy_2_2": -128.38946533203125, "logps_train/policy_2_w": -135.09164428710938, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -101.5, "logps_train/ref_1_w": -124.5, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.8436275124549866, "rewards_train/1-l": -1.265751600265503, "rewards_train/1-w": 1.8830665349960327, "rewards_train/2-2": 1.6062684059143066, "rewards_train/2-w": 1.5904450416564941, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.1488181352615356, "rewards_train/margins_1": 1.0394390225410461, "rewards_train/margins_2": 0.0158233642578125, "step": 30 }, { "epoch": 0.09, "logps_train/policy_1_2": -101.03873443603516, "logps_train/policy_1_l": -107.13160705566406, "logps_train/policy_1_w": -75.81661987304688, "logps_train/policy_2_2": -85.80316925048828, "logps_train/policy_2_w": -87.61444091796875, "logps_train/ref_1_2": -108.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -94.5, "rewards_train/1-2": 0.6664389371871948, "rewards_train/1-l": -1.2061290740966797, "rewards_train/1-w": 0.7795680165290833, "rewards_train/2-2": 0.811089277267456, "rewards_train/2-w": 0.6881659030914307, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.985697090625763, "rewards_train/margins_1": 0.11312907934188843, "rewards_train/margins_2": 0.12292337417602539, "step": 30 }, { "epoch": 0.09, "logps_train/policy_1_2": -181.96133422851562, "logps_train/policy_1_l": -74.45661926269531, "logps_train/policy_1_w": -120.41925048828125, "logps_train/policy_2_2": -146.37399291992188, "logps_train/policy_2_w": -149.44313049316406, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -66.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.46480393409729, "rewards_train/1-l": -0.8241779804229736, "rewards_train/1-w": 1.1612000465393066, "rewards_train/2-2": 1.8024444580078125, "rewards_train/2-w": 1.1181867122650146, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.9853780269622803, "rewards_train/margins_1": -0.3036038875579834, "rewards_train/margins_2": 0.6842577457427979, "step": 30 }, { "epoch": 0.09, "logps_train/policy_1_2": -108.69962310791016, "logps_train/policy_1_l": -156.75927734375, "logps_train/policy_1_w": -99.03584289550781, "logps_train/policy_2_2": -94.07731628417969, "logps_train/policy_2_w": -121.83047485351562, "logps_train/ref_1_2": -118.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 0.887850284576416, "rewards_train/1-l": -1.3485832214355469, "rewards_train/1-w": 0.9081348180770874, "rewards_train/2-2": 0.803206205368042, "rewards_train/2-w": 0.6052337884902954, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.2567180395126343, "rewards_train/margins_1": 0.020284533500671387, "rewards_train/margins_2": 0.19797241687774658, "step": 30 }, { "epoch": 0.09, "logps_train/policy_1_2": -176.4761505126953, "logps_train/policy_1_l": -214.00765991210938, "logps_train/policy_1_w": -164.56781005859375, "logps_train/policy_2_2": -150.76010131835938, "logps_train/policy_2_w": -191.60891723632812, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -193.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.1898852586746216, "rewards_train/1-l": -2.1390483379364014, "rewards_train/1-w": 1.768218755722046, "rewards_train/2-2": 1.6583640575408936, "rewards_train/2-w": 1.8016088008880615, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.9072670936584473, "rewards_train/margins_1": 0.5783334970474243, "rewards_train/margins_2": -0.14324474334716797, "step": 31 }, { "epoch": 0.09, "logps_train/policy_1_2": -231.16712951660156, "logps_train/policy_1_l": -221.0348663330078, "logps_train/policy_1_w": -191.26739501953125, "logps_train/policy_2_2": -197.00277709960938, "logps_train/policy_2_w": -234.16329956054688, "logps_train/ref_1_2": -247.0, "logps_train/ref_1_l": -207.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 1.5989117622375488, "rewards_train/1-l": -1.394892692565918, "rewards_train/1-w": 1.8435728549957275, "rewards_train/2-2": 1.9215971231460571, "rewards_train/2-w": 1.214920163154602, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2384655475616455, "rewards_train/margins_1": 0.2446610927581787, "rewards_train/margins_2": 0.7066769599914551, "step": 31 }, { "epoch": 0.09, "logps_train/policy_1_2": -228.8433074951172, "logps_train/policy_1_l": -160.88848876953125, "logps_train/policy_1_w": -130.71371459960938, "logps_train/policy_2_2": -184.52207946777344, "logps_train/policy_2_w": -169.49501037597656, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -205.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.1312949657440186, "rewards_train/1-l": -2.2154104709625244, "rewards_train/1-w": 2.081752300262451, "rewards_train/2-2": 2.016542911529541, "rewards_train/2-w": 1.6317493915557861, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.297162771224976, "rewards_train/margins_1": 0.9504573345184326, "rewards_train/margins_2": 0.3847935199737549, "step": 31 }, { "epoch": 0.09, "logps_train/policy_1_2": -101.69236755371094, "logps_train/policy_1_l": -126.5907211303711, "logps_train/policy_1_w": -110.27157592773438, "logps_train/policy_2_2": -84.74411010742188, "logps_train/policy_2_w": -129.5880584716797, "logps_train/ref_1_2": -111.5, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": 0.987013578414917, "rewards_train/1-l": -1.3348537683486938, "rewards_train/1-w": 1.2900301218032837, "rewards_train/2-2": 0.9486361742019653, "rewards_train/2-w": 0.9849447011947632, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.6248838901519775, "rewards_train/margins_1": 0.3030165433883667, "rewards_train/margins_2": -0.03630852699279785, "step": 31 }, { "epoch": 0.09, "logps_train/policy_1_2": -179.03277587890625, "logps_train/policy_1_l": -157.09754943847656, "logps_train/policy_1_w": -86.82167053222656, "logps_train/policy_2_2": -138.00515747070312, "logps_train/policy_2_w": -104.131103515625, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -107.0, "rewards_train/1-2": 1.5455496311187744, "rewards_train/1-l": -2.223817825317383, "rewards_train/1-w": 0.49283313751220703, "rewards_train/2-2": 1.6322956085205078, "rewards_train/2-w": 0.3142332434654236, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.71665096282959, "rewards_train/margins_1": -1.0527164936065674, "rewards_train/margins_2": 1.3180623650550842, "step": 31 }, { "epoch": 0.09, "logps_train/policy_1_2": -194.03831481933594, "logps_train/policy_1_l": -226.48924255371094, "logps_train/policy_1_w": -173.1212158203125, "logps_train/policy_2_2": -173.63088989257812, "logps_train/policy_2_w": -206.81759643554688, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.697730302810669, "rewards_train/1-l": -2.8971667289733887, "rewards_train/1-w": 1.3620967864990234, "rewards_train/2-2": 1.7322242259979248, "rewards_train/2-w": 1.1651160717010498, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.259263515472412, "rewards_train/margins_1": -0.3356335163116455, "rewards_train/margins_2": 0.567108154296875, "step": 31 }, { "epoch": 0.09, "logps_train/policy_1_2": -36.27693176269531, "logps_train/policy_1_l": -53.396240234375, "logps_train/policy_1_w": -71.97245025634766, "logps_train/policy_2_2": -28.941011428833008, "logps_train/policy_2_w": -83.68679809570312, "logps_train/ref_1_2": -38.5, "logps_train/ref_1_l": -43.5, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -31.625, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": 0.241838276386261, "rewards_train/1-l": -0.9919679164886475, "rewards_train/1-w": 0.7996302843093872, "rewards_train/2-2": 0.27230513095855713, "rewards_train/2-w": 0.8071016073226929, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 1.7915982007980347, "rewards_train/margins_1": 0.5577920079231262, "rewards_train/margins_2": -0.5347964763641357, "step": 31 }, { "epoch": 0.09, "logps_train/policy_1_2": -171.78561401367188, "logps_train/policy_1_l": -166.60009765625, "logps_train/policy_1_w": -131.26055908203125, "logps_train/policy_2_2": -146.78231811523438, "logps_train/policy_2_w": -162.17108154296875, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.389211654663086, "rewards_train/1-l": -1.5223151445388794, "rewards_train/1-w": 1.7989448308944702, "rewards_train/2-2": 1.5673738718032837, "rewards_train/2-w": 1.2172662019729614, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3212599754333496, "rewards_train/margins_1": 0.4097331762313843, "rewards_train/margins_2": 0.35010766983032227, "step": 31 }, { "epoch": 0.1, "learning_rate": 4.705882352941177e-06, "loss": 1.0964, "step": 32 }, { "epoch": 0.1, "logps_train/policy_1_2": -216.3253173828125, "logps_train/policy_1_l": -266.0518798828125, "logps_train/policy_1_w": -186.51870727539062, "logps_train/policy_2_2": -176.2665252685547, "logps_train/policy_2_w": -232.7656707763672, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -238.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 2.033093214035034, "rewards_train/1-l": -2.8898582458496094, "rewards_train/1-w": 1.9168800115585327, "rewards_train/2-2": 2.2264721393585205, "rewards_train/2-w": 1.3890573978424072, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.806738257408142, "rewards_train/margins_1": -0.11621320247650146, "rewards_train/margins_2": 0.8374147415161133, "step": 32 }, { "epoch": 0.1, "logps_train/policy_1_2": -81.7076187133789, "logps_train/policy_1_l": -55.86343002319336, "logps_train/policy_1_w": -100.90679168701172, "logps_train/policy_2_2": -63.608707427978516, "logps_train/policy_2_w": -131.7489471435547, "logps_train/ref_1_2": -84.5, "logps_train/ref_1_l": -50.5, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -67.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 0.2776753902435303, "rewards_train/1-l": -0.5270411968231201, "rewards_train/1-w": 0.825922429561615, "rewards_train/2-2": 0.3360042870044708, "rewards_train/2-w": 0.6055740118026733, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.352963626384735, "rewards_train/margins_1": 0.5482470393180847, "rewards_train/margins_2": -0.2695697247982025, "step": 32 }, { "epoch": 0.1, "logps_train/policy_1_2": -126.2665023803711, "logps_train/policy_1_l": -111.0805435180664, "logps_train/policy_1_w": -97.1767349243164, "logps_train/policy_2_2": -109.43357849121094, "logps_train/policy_2_w": -112.32194519042969, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -117.0, "rewards_train/1-2": 1.2249126434326172, "rewards_train/1-l": -1.1096172332763672, "rewards_train/1-w": 0.6823266744613647, "rewards_train/2-2": 1.470704436302185, "rewards_train/2-w": 0.44124311208724976, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.791943907737732, "rewards_train/margins_1": -0.5425859689712524, "rewards_train/margins_2": 1.0294613242149353, "step": 32 }, { "epoch": 0.1, "logps_train/policy_1_2": -118.54352569580078, "logps_train/policy_1_l": -131.65823364257812, "logps_train/policy_1_w": -113.8382568359375, "logps_train/policy_2_2": -101.83907318115234, "logps_train/policy_2_w": -135.96725463867188, "logps_train/ref_1_2": -122.5, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 0.43002229928970337, "rewards_train/1-l": -1.253908634185791, "rewards_train/1-w": 0.5442992448806763, "rewards_train/2-2": 0.6067178845405579, "rewards_train/2-w": 0.18374428153038025, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.7982078790664673, "rewards_train/margins_1": 0.1142769455909729, "rewards_train/margins_2": 0.4229736030101776, "step": 32 }, { "epoch": 0.1, "logps_train/policy_1_2": -211.3927764892578, "logps_train/policy_1_l": -237.5184326171875, "logps_train/policy_1_w": -197.8778076171875, "logps_train/policy_2_2": -173.95941162109375, "logps_train/policy_2_w": -250.00778198242188, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -214.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -266.0, "rewards_train/1-2": 2.0138473510742188, "rewards_train/1-l": -2.3541858196258545, "rewards_train/1-w": 2.26534366607666, "rewards_train/2-2": 2.5571842193603516, "rewards_train/2-w": 1.6570329666137695, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.619529485702515, "rewards_train/margins_1": 0.2514963150024414, "rewards_train/margins_2": 0.900151252746582, "step": 32 }, { "epoch": 0.1, "logps_train/policy_1_2": -168.16134643554688, "logps_train/policy_1_l": -120.83909606933594, "logps_train/policy_1_w": -144.09129333496094, "logps_train/policy_2_2": -153.86837768554688, "logps_train/policy_2_w": -160.60025024414062, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.0637485980987549, "rewards_train/1-l": -1.100316047668457, "rewards_train/1-w": 0.20571424067020416, "rewards_train/2-2": 1.271072506904602, "rewards_train/2-w": 0.027475804090499878, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.3060302883386612, "rewards_train/margins_1": -0.8580343574285507, "rewards_train/margins_2": 1.2435967028141022, "step": 32 }, { "epoch": 0.1, "logps_train/policy_1_2": -155.15972900390625, "logps_train/policy_1_l": -179.6048583984375, "logps_train/policy_1_w": -146.92507934570312, "logps_train/policy_2_2": -140.56817626953125, "logps_train/policy_2_w": -167.04833984375, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.5559015274047852, "rewards_train/1-l": -2.1013054847717285, "rewards_train/1-w": 1.422335147857666, "rewards_train/2-2": 1.515838384628296, "rewards_train/2-w": 1.146728515625, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5236406326293945, "rewards_train/margins_1": -0.13356637954711914, "rewards_train/margins_2": 0.3691098690032959, "step": 32 }, { "epoch": 0.1, "logps_train/policy_1_2": -157.575927734375, "logps_train/policy_1_l": -137.8429718017578, "logps_train/policy_1_w": -126.75273895263672, "logps_train/policy_2_2": -130.862060546875, "logps_train/policy_2_w": -162.8986358642578, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.2830324172973633, "rewards_train/1-l": -1.4061720371246338, "rewards_train/1-w": 1.7575383186340332, "rewards_train/2-2": 1.343482494354248, "rewards_train/2-w": 1.3335734605789185, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.163710355758667, "rewards_train/margins_1": 0.4745059013366699, "rewards_train/margins_2": 0.00990903377532959, "step": 32 }, { "epoch": 0.1, "logps_train/policy_1_2": -119.3487548828125, "logps_train/policy_1_l": -139.65728759765625, "logps_train/policy_1_w": -97.03947448730469, "logps_train/policy_2_2": -97.54962158203125, "logps_train/policy_2_w": -119.33551025390625, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.3909063339233398, "rewards_train/1-l": -1.58701753616333, "rewards_train/1-w": 1.5132396221160889, "rewards_train/2-2": 1.570038080215454, "rewards_train/2-w": 0.976604700088501, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.100257158279419, "rewards_train/margins_1": 0.12233328819274902, "rewards_train/margins_2": 0.5934333801269531, "step": 33 }, { "epoch": 0.1, "logps_train/policy_1_2": -87.0843505859375, "logps_train/policy_1_l": -127.72029113769531, "logps_train/policy_1_w": -105.79652404785156, "logps_train/policy_2_2": -66.57708740234375, "logps_train/policy_2_w": -125.34532928466797, "logps_train/ref_1_2": -92.5, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -111.5, "logps_train/ref_2_2": -73.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 0.5634400844573975, "rewards_train/1-l": -1.155623435974121, "rewards_train/1-w": 0.5648790597915649, "rewards_train/2-2": 0.675104022026062, "rewards_train/2-w": 0.4342174232006073, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.720502495765686, "rewards_train/margins_1": 0.0014389753341674805, "rewards_train/margins_2": 0.2408865988254547, "step": 33 }, { "epoch": 0.1, "logps_train/policy_1_2": -160.22659301757812, "logps_train/policy_1_l": -228.1929473876953, "logps_train/policy_1_w": -182.7032012939453, "logps_train/policy_2_2": -139.75677490234375, "logps_train/policy_2_w": -206.52174377441406, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 1.2617161273956299, "rewards_train/1-l": -1.876715898513794, "rewards_train/1-w": 1.2718679904937744, "rewards_train/2-2": 1.3969790935516357, "rewards_train/2-w": 0.8462642431259155, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1485838890075684, "rewards_train/margins_1": 0.010151863098144531, "rewards_train/margins_2": 0.5507148504257202, "step": 33 }, { "epoch": 0.1, "logps_train/policy_1_2": -87.77039337158203, "logps_train/policy_1_l": -105.18080139160156, "logps_train/policy_1_w": -94.98175811767578, "logps_train/policy_2_2": -73.86395263671875, "logps_train/policy_2_w": -109.92611694335938, "logps_train/ref_1_2": -85.5, "logps_train/ref_1_l": -104.5, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -74.5, "logps_train/ref_2_w": -115.5, "rewards_train/1-2": -0.18719546496868134, "rewards_train/1-l": -0.06808014214038849, "rewards_train/1-w": 0.929167628288269, "rewards_train/2-2": 0.03743262588977814, "rewards_train/2-w": 0.5308260917663574, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.9972477704286575, "rewards_train/margins_1": 1.1163630932569504, "rewards_train/margins_2": -0.4933934658765793, "step": 33 }, { "epoch": 0.1, "logps_train/policy_1_2": -142.79312133789062, "logps_train/policy_1_l": -72.81449890136719, "logps_train/policy_1_w": -95.07416534423828, "logps_train/policy_2_2": -125.01122283935547, "logps_train/policy_2_w": -110.87568664550781, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -67.5, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 1.650374174118042, "rewards_train/1-l": -0.5281291604042053, "rewards_train/1-w": 1.1140918731689453, "rewards_train/2-2": 1.7285654544830322, "rewards_train/2-w": 0.9351369142532349, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.6422210335731506, "rewards_train/margins_1": -0.5362823009490967, "rewards_train/margins_2": 0.7934285402297974, "step": 33 }, { "epoch": 0.1, "logps_train/policy_1_2": -112.10408020019531, "logps_train/policy_1_l": -129.11497497558594, "logps_train/policy_1_w": -111.94183349609375, "logps_train/policy_2_2": -90.06573486328125, "logps_train/policy_2_w": -138.9652862548828, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -95.5, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 0.19506040215492249, "rewards_train/1-l": -0.759544312953949, "rewards_train/1-w": 1.567535638809204, "rewards_train/2-2": 0.5506524443626404, "rewards_train/2-w": 1.608157992362976, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.327079951763153, "rewards_train/margins_1": 1.3724752366542816, "rewards_train/margins_2": -1.0575055480003357, "step": 33 }, { "epoch": 0.1, "logps_train/policy_1_2": -99.78922271728516, "logps_train/policy_1_l": -124.91059875488281, "logps_train/policy_1_w": -125.09993743896484, "logps_train/policy_2_2": -84.80976867675781, "logps_train/policy_2_w": -136.73214721679688, "logps_train/ref_1_2": -107.5, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 0.800765335559845, "rewards_train/1-l": -0.7074658870697021, "rewards_train/1-w": 1.7540688514709473, "rewards_train/2-2": 0.9283982515335083, "rewards_train/2-w": 1.784597396850586, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.4615347385406494, "rewards_train/margins_1": 0.9533035159111023, "rewards_train/margins_2": -0.8561991453170776, "step": 33 }, { "epoch": 0.1, "logps_train/policy_1_2": -145.60733032226562, "logps_train/policy_1_l": -199.3702392578125, "logps_train/policy_1_w": -110.39176940917969, "logps_train/policy_2_2": -124.48201751708984, "logps_train/policy_2_w": -136.41221618652344, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.0193455219268799, "rewards_train/1-l": -1.6025989055633545, "rewards_train/1-w": 1.2473464012145996, "rewards_train/2-2": 1.1282870769500732, "rewards_train/2-w": 0.959560215473175, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.849945306777954, "rewards_train/margins_1": 0.22800087928771973, "rewards_train/margins_2": 0.1687268614768982, "step": 33 }, { "epoch": 0.1, "learning_rate": 5e-06, "loss": 1.236, "step": 34 }, { "epoch": 0.1, "logps_train/policy_1_2": -206.2513427734375, "logps_train/policy_1_l": -181.12744140625, "logps_train/policy_1_w": -178.76138305664062, "logps_train/policy_2_2": -173.34100341796875, "logps_train/policy_2_w": -205.08587646484375, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.4647101163864136, "rewards_train/1-l": -1.4760258197784424, "rewards_train/1-w": 1.3285508155822754, "rewards_train/2-2": 1.8096485137939453, "rewards_train/2-w": 1.2601615190505981, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.8045766353607178, "rewards_train/margins_1": -0.13615930080413818, "rewards_train/margins_2": 0.5494869947433472, "step": 34 }, { "epoch": 0.1, "logps_train/policy_1_2": -173.90521240234375, "logps_train/policy_1_l": -145.4657745361328, "logps_train/policy_1_w": -168.44021606445312, "logps_train/policy_2_2": -154.22743225097656, "logps_train/policy_2_w": -199.2456512451172, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.5063540935516357, "rewards_train/1-l": -1.5082963705062866, "rewards_train/1-w": 2.290353775024414, "rewards_train/2-2": 1.6866322755813599, "rewards_train/2-w": 1.9348084926605225, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.7986501455307007, "rewards_train/margins_1": 0.7839996814727783, "rewards_train/margins_2": -0.2481762170791626, "step": 34 }, { "epoch": 0.1, "logps_train/policy_1_2": -127.31504821777344, "logps_train/policy_1_l": -115.41683197021484, "logps_train/policy_1_w": -160.67286682128906, "logps_train/policy_2_2": -112.04479217529297, "logps_train/policy_2_w": -178.46365356445312, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.106287956237793, "rewards_train/1-l": -1.0489096641540527, "rewards_train/1-w": 1.8311512470245361, "rewards_train/2-2": 1.3550914525985718, "rewards_train/2-w": 1.5208220481872559, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.880060911178589, "rewards_train/margins_1": 0.7248632907867432, "rewards_train/margins_2": -0.16573059558868408, "step": 34 }, { "epoch": 0.1, "logps_train/policy_1_2": -110.16183471679688, "logps_train/policy_1_l": -147.7451171875, "logps_train/policy_1_w": -115.38233947753906, "logps_train/policy_2_2": -92.9717788696289, "logps_train/policy_2_w": -135.6752471923828, "logps_train/ref_1_2": -117.5, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 0.7293241024017334, "rewards_train/1-l": -0.8596674203872681, "rewards_train/1-w": 1.7632300853729248, "rewards_train/2-2": 1.1877830028533936, "rewards_train/2-w": 1.521733283996582, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 2.622897505760193, "rewards_train/margins_1": 1.0339059829711914, "rewards_train/margins_2": -0.3339502811431885, "step": 34 }, { "epoch": 0.1, "logps_train/policy_1_2": -134.25001525878906, "logps_train/policy_1_l": -62.66595458984375, "logps_train/policy_1_w": -78.8382339477539, "logps_train/policy_2_2": -100.16023254394531, "logps_train/policy_2_w": -108.4109115600586, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -59.75, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -113.0, "rewards_train/1-2": 0.6624982357025146, "rewards_train/1-l": -0.30097055435180664, "rewards_train/1-w": 0.7193014621734619, "rewards_train/2-2": 0.9308512806892395, "rewards_train/2-w": 0.43703365325927734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.0202720165252686, "rewards_train/margins_1": 0.056803226470947266, "rewards_train/margins_2": 0.49381762742996216, "step": 34 }, { "epoch": 0.1, "logps_train/policy_1_2": -210.0677947998047, "logps_train/policy_1_l": -199.89596557617188, "logps_train/policy_1_w": -203.78338623046875, "logps_train/policy_2_2": -174.82861328125, "logps_train/policy_2_w": -242.9302978515625, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -225.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 2.265096426010132, "rewards_train/1-l": -1.4346168041229248, "rewards_train/1-w": 2.110722541809082, "rewards_train/2-2": 2.42026424407959, "rewards_train/2-w": 1.3429076671600342, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.545339345932007, "rewards_train/margins_1": -0.1543738842010498, "rewards_train/margins_2": 1.0773565769195557, "step": 34 }, { "epoch": 0.1, "logps_train/policy_1_2": -58.86595153808594, "logps_train/policy_1_l": -31.095703125, "logps_train/policy_1_w": -57.911964416503906, "logps_train/policy_2_2": -49.209590911865234, "logps_train/policy_2_w": -72.65937042236328, "logps_train/ref_1_2": -66.0, "logps_train/ref_1_l": -26.875, "logps_train/ref_1_w": -67.0, "logps_train/ref_2_2": -58.0, "logps_train/ref_2_w": -78.5, "rewards_train/1-2": 0.7095966339111328, "rewards_train/1-l": -0.4188476502895355, "rewards_train/1-w": 0.9162259101867676, "rewards_train/2-2": 0.8864630460739136, "rewards_train/2-w": 0.5567187666893005, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.335073560476303, "rewards_train/margins_1": 0.20662927627563477, "rewards_train/margins_2": 0.32974427938461304, "step": 34 }, { "epoch": 0.1, "logps_train/policy_1_2": -210.01821899414062, "logps_train/policy_1_l": -292.7999267578125, "logps_train/policy_1_w": -275.4430847167969, "logps_train/policy_2_2": -182.51092529296875, "logps_train/policy_2_w": -322.6082458496094, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -270.0, "logps_train/ref_1_w": -306.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -344.0, "rewards_train/1-2": 2.260678768157959, "rewards_train/1-l": -2.2799930572509766, "rewards_train/1-w": 3.1431925296783447, "rewards_train/2-2": 2.5176565647125244, "rewards_train/2-w": 2.245424747467041, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.423185586929321, "rewards_train/margins_1": 0.8825137615203857, "rewards_train/margins_2": 0.2722318172454834, "step": 34 }, { "epoch": 0.1, "logps_train/policy_1_2": -215.9989013671875, "logps_train/policy_1_l": -220.08087158203125, "logps_train/policy_1_w": -170.96054077148438, "logps_train/policy_2_2": -190.9879150390625, "logps_train/policy_2_w": -202.2618408203125, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 2.2766740322113037, "rewards_train/1-l": -1.2343568801879883, "rewards_train/1-w": 1.8664467334747314, "rewards_train/2-2": 2.379333257675171, "rewards_train/2-w": 1.5238165855407715, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.1008036136627197, "rewards_train/margins_1": -0.41022729873657227, "rewards_train/margins_2": 0.8555166721343994, "step": 35 }, { "epoch": 0.1, "logps_train/policy_1_2": -26.233243942260742, "logps_train/policy_1_l": -28.24112319946289, "logps_train/policy_1_w": -33.23173141479492, "logps_train/policy_2_2": -21.169361114501953, "logps_train/policy_2_w": -35.24581527709961, "logps_train/ref_1_2": -29.0, "logps_train/ref_1_l": -26.75, "logps_train/ref_1_w": -34.25, "logps_train/ref_2_2": -24.625, "logps_train/ref_2_w": -35.5, "rewards_train/1-2": 0.27042561769485474, "rewards_train/1-l": -0.1475984752178192, "rewards_train/1-w": 0.09245174378156662, "rewards_train/2-2": 0.3457592725753784, "rewards_train/2-w": 0.03459826111793518, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.24005021899938583, "rewards_train/margins_1": -0.17797387391328812, "rewards_train/margins_2": 0.31116101145744324, "step": 35 }, { "epoch": 0.1, "logps_train/policy_1_2": -215.8778839111328, "logps_train/policy_1_l": -217.93209838867188, "logps_train/policy_1_w": -149.3421630859375, "logps_train/policy_2_2": -183.9075164794922, "logps_train/policy_2_w": -183.88284301757812, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.6958057880401611, "rewards_train/1-l": -1.13227117061615, "rewards_train/1-w": 1.8532837629318237, "rewards_train/2-2": 1.7592475414276123, "rewards_train/2-w": 1.660151720046997, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.9855549335479736, "rewards_train/margins_1": 0.1574779748916626, "rewards_train/margins_2": 0.09909582138061523, "step": 35 }, { "epoch": 0.1, "logps_train/policy_1_2": -167.1512451171875, "logps_train/policy_1_l": -148.64288330078125, "logps_train/policy_1_w": -135.98654174804688, "logps_train/policy_2_2": -137.31387329101562, "logps_train/policy_2_w": -171.17694091796875, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.0973764657974243, "rewards_train/1-l": -0.897490918636322, "rewards_train/1-w": 1.5419707298278809, "rewards_train/2-2": 1.3186132907867432, "rewards_train/2-w": 1.0916796922683716, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.439461648464203, "rewards_train/margins_1": 0.44459426403045654, "rewards_train/margins_2": 0.22693359851837158, "step": 35 }, { "epoch": 0.1, "logps_train/policy_1_2": -133.97134399414062, "logps_train/policy_1_l": -178.48294067382812, "logps_train/policy_1_w": -154.73101806640625, "logps_train/policy_2_2": -113.33660888671875, "logps_train/policy_2_w": -189.96145629882812, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.4075536727905273, "rewards_train/1-l": -1.5238804817199707, "rewards_train/1-w": 2.1433048248291016, "rewards_train/2-2": 1.911651372909546, "rewards_train/2-w": 1.6210408210754395, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.6671853065490723, "rewards_train/margins_1": 0.7357511520385742, "rewards_train/margins_2": 0.29061055183410645, "step": 35 }, { "epoch": 0.1, "logps_train/policy_1_2": -159.33743286132812, "logps_train/policy_1_l": -189.04965209960938, "logps_train/policy_1_w": -151.8009796142578, "logps_train/policy_2_2": -120.6468734741211, "logps_train/policy_2_w": -206.76705932617188, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -225.0, "rewards_train/1-2": 1.4396955966949463, "rewards_train/1-l": -1.8432461023330688, "rewards_train/1-w": 2.2527146339416504, "rewards_train/2-2": 1.9579696655273438, "rewards_train/2-w": 1.8580596446990967, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.095960736274719, "rewards_train/margins_1": 0.8130190372467041, "rewards_train/margins_2": 0.09991002082824707, "step": 35 }, { "epoch": 0.1, "logps_train/policy_1_2": -81.27853393554688, "logps_train/policy_1_l": -116.94893646240234, "logps_train/policy_1_w": -70.82093048095703, "logps_train/policy_2_2": -66.63426208496094, "logps_train/policy_2_w": -83.86001586914062, "logps_train/ref_1_2": -87.5, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -80.5, "logps_train/ref_2_2": -75.0, "logps_train/ref_2_w": -91.0, "rewards_train/1-2": 0.6033971309661865, "rewards_train/1-l": -0.6862994432449341, "rewards_train/1-w": 0.9582880735397339, "rewards_train/2-2": 0.8068857192993164, "rewards_train/2-w": 0.709311306476593, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.644587516784668, "rewards_train/margins_1": 0.35489094257354736, "rewards_train/margins_2": 0.09757441282272339, "step": 35 }, { "epoch": 0.1, "logps_train/policy_1_2": -134.8917236328125, "logps_train/policy_1_l": -157.12269592285156, "logps_train/policy_1_w": -101.64161682128906, "logps_train/policy_2_2": -116.28840637207031, "logps_train/policy_2_w": -116.33463287353516, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -113.5, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.1623897552490234, "rewards_train/1-l": -0.8751611113548279, "rewards_train/1-w": 1.2092753648757935, "rewards_train/2-2": 1.0406907796859741, "rewards_train/2-w": 0.9555989503860474, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.0844364762306213, "rewards_train/margins_1": 0.04688560962677002, "rewards_train/margins_2": 0.08509182929992676, "step": 35 }, { "epoch": 0.11, "learning_rate": 4.999878002203615e-06, "loss": 1.1169, "step": 36 }, { "epoch": 0.11, "logps_train/policy_1_2": -140.87132263183594, "logps_train/policy_1_l": -136.3379364013672, "logps_train/policy_1_w": -158.49374389648438, "logps_train/policy_2_2": -118.95376586914062, "logps_train/policy_2_w": -188.87135314941406, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.3284932374954224, "rewards_train/1-l": -1.232621192932129, "rewards_train/1-w": 2.095937728881836, "rewards_train/2-2": 1.809310793876648, "rewards_train/2-w": 1.6816149950027466, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.328558921813965, "rewards_train/margins_1": 0.7674444913864136, "rewards_train/margins_2": 0.12769579887390137, "step": 36 }, { "epoch": 0.11, "logps_train/policy_1_2": -181.6966552734375, "logps_train/policy_1_l": -148.5622100830078, "logps_train/policy_1_w": -205.92532348632812, "logps_train/policy_2_2": -150.1646728515625, "logps_train/policy_2_w": -235.41497802734375, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -227.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -255.0, "rewards_train/1-2": 1.8631476163864136, "rewards_train/1-l": -0.2585647702217102, "rewards_train/1-w": 2.105905055999756, "rewards_train/2-2": 2.3179092407226562, "rewards_train/2-w": 1.9553778171539307, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.364469826221466, "rewards_train/margins_1": 0.24275743961334229, "rewards_train/margins_2": 0.3625314235687256, "step": 36 }, { "epoch": 0.11, "logps_train/policy_1_2": -168.4813995361328, "logps_train/policy_1_l": -163.63717651367188, "logps_train/policy_1_w": -169.84140014648438, "logps_train/policy_2_2": -150.11996459960938, "logps_train/policy_2_w": -192.7930145263672, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": 1.7701225280761719, "rewards_train/1-l": -1.2594212293624878, "rewards_train/1-w": 2.220059871673584, "rewards_train/2-2": 2.181753635406494, "rewards_train/2-w": 2.048823356628418, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.4794811010360718, "rewards_train/margins_1": 0.4499373435974121, "rewards_train/margins_2": 0.13293027877807617, "step": 36 }, { "epoch": 0.11, "logps_train/policy_1_2": -108.62063598632812, "logps_train/policy_1_l": -127.14269256591797, "logps_train/policy_1_w": -99.58663940429688, "logps_train/policy_2_2": -95.00304412841797, "logps_train/policy_2_w": -118.63565063476562, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 1.4566869735717773, "rewards_train/1-l": -0.7976673245429993, "rewards_train/1-w": 2.063211441040039, "rewards_train/2-2": 1.6012579202651978, "rewards_train/2-w": 1.9184658527374268, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.8608787655830383, "rewards_train/margins_1": 0.6065244674682617, "rewards_train/margins_2": -0.317207932472229, "step": 36 }, { "epoch": 0.11, "logps_train/policy_1_2": -94.95486450195312, "logps_train/policy_1_l": -156.1600341796875, "logps_train/policy_1_w": -103.53971862792969, "logps_train/policy_2_2": -85.69795227050781, "logps_train/policy_2_w": -128.74375915527344, "logps_train/ref_1_2": -101.5, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.6490452289581299, "rewards_train/1-l": -1.1600943803787231, "rewards_train/1-w": 1.0257160663604736, "rewards_train/2-2": 0.5200488567352295, "rewards_train/2-w": 0.71117103099823, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.1858104467391968, "rewards_train/margins_1": 0.37667083740234375, "rewards_train/margins_2": -0.1911221742630005, "step": 36 }, { "epoch": 0.11, "logps_train/policy_1_2": -214.69671630859375, "logps_train/policy_1_l": -201.3303680419922, "logps_train/policy_1_w": -150.15924072265625, "logps_train/policy_2_2": -191.28204345703125, "logps_train/policy_2_w": -176.086181640625, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 2.0303287506103516, "rewards_train/1-l": -1.3717091083526611, "rewards_train/1-w": 2.068450927734375, "rewards_train/2-2": 2.399921417236328, "rewards_train/2-w": 1.6695069074630737, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.440160036087036, "rewards_train/margins_1": 0.03812217712402344, "rewards_train/margins_2": 0.7304145097732544, "step": 36 }, { "epoch": 0.11, "logps_train/policy_1_2": -197.461181640625, "logps_train/policy_1_l": -135.61456298828125, "logps_train/policy_1_w": -165.2714385986328, "logps_train/policy_2_2": -155.77899169921875, "logps_train/policy_2_w": -209.1654052734375, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -225.0, "rewards_train/1-2": 1.373412847518921, "rewards_train/1-l": -1.1373361349105835, "rewards_train/1-w": 1.902543544769287, "rewards_train/2-2": 2.2424144744873047, "rewards_train/2-w": 1.6365842819213867, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0398796796798706, "rewards_train/margins_1": 0.5291306972503662, "rewards_train/margins_2": 0.605830192565918, "step": 36 }, { "epoch": 0.11, "logps_train/policy_1_2": -165.42916870117188, "logps_train/policy_1_l": -141.63198852539062, "logps_train/policy_1_w": -159.22836303710938, "logps_train/policy_2_2": -131.30218505859375, "logps_train/policy_2_w": -203.21144104003906, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 0.8555200099945068, "rewards_train/1-l": -0.6296057105064392, "rewards_train/1-w": 2.139665126800537, "rewards_train/2-2": 1.2385320663452148, "rewards_train/2-w": 1.338230013847351, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.7692708373069763, "rewards_train/margins_1": 1.2841451168060303, "rewards_train/margins_2": -0.09969794750213623, "step": 36 }, { "epoch": 0.11, "logps_train/policy_1_2": -269.88848876953125, "logps_train/policy_1_l": -220.15386962890625, "logps_train/policy_1_w": -207.16207885742188, "logps_train/policy_2_2": -242.1167755126953, "logps_train/policy_2_w": -242.23419189453125, "logps_train/ref_1_2": -296.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -238.0, "logps_train/ref_2_2": -270.0, "logps_train/ref_2_w": -270.0, "rewards_train/1-2": 2.6674017906188965, "rewards_train/1-l": -1.0731992721557617, "rewards_train/1-w": 3.1275429725646973, "rewards_train/2-2": 2.8570733070373535, "rewards_train/2-w": 2.720329999923706, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.200742244720459, "rewards_train/margins_1": 0.4601411819458008, "rewards_train/margins_2": 0.13674330711364746, "step": 37 }, { "epoch": 0.11, "logps_train/policy_1_2": -166.7537841796875, "logps_train/policy_1_l": -166.36764526367188, "logps_train/policy_1_w": -170.42135620117188, "logps_train/policy_2_2": -138.7286376953125, "logps_train/policy_2_w": -204.29554748535156, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.4027462005615234, "rewards_train/1-l": -1.2893043756484985, "rewards_train/1-w": 2.0404815673828125, "rewards_train/2-2": 1.7286975383758545, "rewards_train/2-w": 1.4529163837432861, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.329785943031311, "rewards_train/margins_1": 0.6377353668212891, "rewards_train/margins_2": 0.27578115463256836, "step": 37 }, { "epoch": 0.11, "logps_train/policy_1_2": -169.5704345703125, "logps_train/policy_1_l": -152.42930603027344, "logps_train/policy_1_w": -99.54548645019531, "logps_train/policy_2_2": -147.22793579101562, "logps_train/policy_2_w": -118.09077453613281, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -111.5, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 1.208190679550171, "rewards_train/1-l": -0.6610940098762512, "rewards_train/1-w": 1.1966230869293213, "rewards_train/2-2": 1.4736905097961426, "rewards_train/2-w": 1.0018595457077026, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.8577170968055725, "rewards_train/margins_1": -0.01156759262084961, "rewards_train/margins_2": 0.47183096408843994, "step": 37 }, { "epoch": 0.11, "logps_train/policy_1_2": -112.2877197265625, "logps_train/policy_1_l": -134.24758911132812, "logps_train/policy_1_w": -148.0459442138672, "logps_train/policy_2_2": -91.62528228759766, "logps_train/policy_2_w": -165.35870361328125, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -103.5, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 0.8977905511856079, "rewards_train/1-l": -0.6882369518280029, "rewards_train/1-w": 1.75067937374115, "rewards_train/2-2": 1.1612999439239502, "rewards_train/2-w": 1.4248719215393066, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.438916325569153, "rewards_train/margins_1": 0.852888822555542, "rewards_train/margins_2": -0.26357197761535645, "step": 37 }, { "epoch": 0.11, "logps_train/policy_1_2": -161.61184692382812, "logps_train/policy_1_l": -154.89242553710938, "logps_train/policy_1_w": -134.438720703125, "logps_train/policy_2_2": -130.05426025390625, "logps_train/policy_2_w": -164.97119140625, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.0544395446777344, "rewards_train/1-l": -0.5736182928085327, "rewards_train/1-w": 1.5740966796875, "rewards_train/2-2": 1.1851999759674072, "rewards_train/2-w": 1.0997560024261475, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.1477149724960327, "rewards_train/margins_1": 0.5196571350097656, "rewards_train/margins_2": 0.08544397354125977, "step": 37 }, { "epoch": 0.11, "logps_train/policy_1_2": -160.6177978515625, "logps_train/policy_1_l": -93.81890869140625, "logps_train/policy_1_w": -99.41685485839844, "logps_train/policy_2_2": -137.537353515625, "logps_train/policy_2_w": -118.82904815673828, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -113.5, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.7850959300994873, "rewards_train/1-l": -0.36157822608947754, "rewards_train/1-w": 1.437220573425293, "rewards_train/2-2": 2.1970462799072266, "rewards_train/2-w": 1.1874074935913086, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 1.7987987995147705, "rewards_train/margins_1": -0.34787535667419434, "rewards_train/margins_2": 1.009638786315918, "step": 37 }, { "epoch": 0.11, "logps_train/policy_1_2": -124.575927734375, "logps_train/policy_1_l": -75.609619140625, "logps_train/policy_1_w": -68.81211853027344, "logps_train/policy_2_2": -110.00419616699219, "logps_train/policy_2_w": -82.33074951171875, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -69.5, "logps_train/ref_1_w": -73.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -86.0, "rewards_train/1-2": 1.2183842658996582, "rewards_train/1-l": -0.6078370213508606, "rewards_train/1-w": 0.45003795623779297, "rewards_train/2-2": 1.357588291168213, "rewards_train/2-w": 0.3622380495071411, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.0578749775886536, "rewards_train/margins_1": -0.7683463096618652, "rewards_train/margins_2": 0.9953502416610718, "step": 37 }, { "epoch": 0.11, "logps_train/policy_1_2": -149.96331787109375, "logps_train/policy_1_l": -157.5269317626953, "logps_train/policy_1_w": -87.49180603027344, "logps_train/policy_2_2": -134.15789794921875, "logps_train/policy_2_w": -106.26910400390625, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 2.1802315711975098, "rewards_train/1-l": -1.281501054763794, "rewards_train/1-w": 1.4153703451156616, "rewards_train/2-2": 2.4728808403015137, "rewards_train/2-w": 1.1226990222930908, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.6968713998794556, "rewards_train/margins_1": -0.7648612260818481, "rewards_train/margins_2": 1.3501818180084229, "step": 37 }, { "epoch": 0.11, "learning_rate": 4.9995120207212275e-06, "loss": 0.9671, "step": 38 }, { "epoch": 0.11, "logps_train/policy_1_2": -137.5615692138672, "logps_train/policy_1_l": -124.86776733398438, "logps_train/policy_1_w": -85.77369689941406, "logps_train/policy_2_2": -111.16494750976562, "logps_train/policy_2_w": -111.95088195800781, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 1.1547805070877075, "rewards_train/1-l": -0.727401852607727, "rewards_train/1-w": 1.4035868644714355, "rewards_train/2-2": 1.4772549867630005, "rewards_train/2-w": 1.0103809833526611, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.1309887170791626, "rewards_train/margins_1": 0.24880635738372803, "rewards_train/margins_2": 0.46687400341033936, "step": 38 }, { "epoch": 0.11, "logps_train/policy_1_2": -257.52435302734375, "logps_train/policy_1_l": -284.0781555175781, "logps_train/policy_1_w": -204.78387451171875, "logps_train/policy_2_2": -228.6600341796875, "logps_train/policy_2_w": -236.3159637451172, "logps_train/ref_1_2": -288.0, "logps_train/ref_1_l": -260.0, "logps_train/ref_1_w": -241.0, "logps_train/ref_2_2": -264.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 2.9788150787353516, "rewards_train/1-l": -2.3625028133392334, "rewards_train/1-w": 3.5747385025024414, "rewards_train/2-2": 3.47149658203125, "rewards_train/2-w": 3.0512170791625977, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.937241315841675, "rewards_train/margins_1": 0.5959234237670898, "rewards_train/margins_2": 0.42027950286865234, "step": 38 }, { "epoch": 0.11, "logps_train/policy_1_2": -141.2134552001953, "logps_train/policy_1_l": -122.8428955078125, "logps_train/policy_1_w": -120.0215072631836, "logps_train/policy_2_2": -121.09233093261719, "logps_train/policy_2_w": -139.87503051757812, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.0411540269851685, "rewards_train/1-l": -0.5185176134109497, "rewards_train/1-w": 1.5470677614212036, "rewards_train/2-2": 1.2837364673614502, "rewards_train/2-w": 1.3164036273956299, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.0655853748321533, "rewards_train/margins_1": 0.5059137344360352, "rewards_train/margins_2": -0.03266716003417969, "step": 38 }, { "epoch": 0.11, "logps_train/policy_1_2": -173.97372436523438, "logps_train/policy_1_l": -128.0215301513672, "logps_train/policy_1_w": -115.23684692382812, "logps_train/policy_2_2": -147.31536865234375, "logps_train/policy_2_w": -138.7118377685547, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 1.858877420425415, "rewards_train/1-l": -0.532817006111145, "rewards_train/1-w": 1.699363350868225, "rewards_train/2-2": 2.2946348190307617, "rewards_train/2-w": 1.2163166999816895, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.23218035697937, "rewards_train/margins_1": -0.15951406955718994, "rewards_train/margins_2": 1.0783181190490723, "step": 38 }, { "epoch": 0.11, "logps_train/policy_1_2": -203.9390106201172, "logps_train/policy_1_l": -257.143798828125, "logps_train/policy_1_w": -186.85736083984375, "logps_train/policy_2_2": -166.88015747070312, "logps_train/policy_2_w": -222.65087890625, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -234.0, "logps_train/ref_1_w": -213.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 1.1435980796813965, "rewards_train/1-l": -2.3264880180358887, "rewards_train/1-w": 2.5978586673736572, "rewards_train/2-2": 1.556124210357666, "rewards_train/2-w": 1.7786619663238525, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.924346685409546, "rewards_train/margins_1": 1.4542605876922607, "rewards_train/margins_2": -0.22253775596618652, "step": 38 }, { "epoch": 0.11, "logps_train/policy_1_2": -129.003173828125, "logps_train/policy_1_l": -103.30290222167969, "logps_train/policy_1_w": -128.1964111328125, "logps_train/policy_2_2": -107.86436462402344, "logps_train/policy_2_w": -156.88035583496094, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -100.5, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.1340581178665161, "rewards_train/1-l": -0.2877117991447449, "rewards_train/1-w": 1.5330934524536133, "rewards_train/2-2": 1.4401261806488037, "rewards_train/2-w": 1.0686050653457642, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.8208052515983582, "rewards_train/margins_1": 0.39903533458709717, "rewards_train/margins_2": 0.37152111530303955, "step": 38 }, { "epoch": 0.11, "logps_train/policy_1_2": -193.17324829101562, "logps_train/policy_1_l": -143.754638671875, "logps_train/policy_1_w": -134.67758178710938, "logps_train/policy_2_2": -165.10020446777344, "logps_train/policy_2_w": -160.08004760742188, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.45455002784729, "rewards_train/1-l": -0.10280640423297882, "rewards_train/1-w": 1.6224770545959473, "rewards_train/2-2": 2.2837305068969727, "rewards_train/2-w": 1.2341835498809814, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.725283458828926, "rewards_train/margins_1": 0.16792702674865723, "rewards_train/margins_2": 1.0495469570159912, "step": 38 }, { "epoch": 0.11, "logps_train/policy_1_2": -123.60456085205078, "logps_train/policy_1_l": -111.53271484375, "logps_train/policy_1_w": -122.02210998535156, "logps_train/policy_2_2": -103.6818618774414, "logps_train/policy_2_w": -141.23243713378906, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.4496995210647583, "rewards_train/1-l": -0.9434083700180054, "rewards_train/1-w": 1.545152187347412, "rewards_train/2-2": 1.7689236402511597, "rewards_train/2-w": 1.1611311435699463, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.4885605573654175, "rewards_train/margins_1": 0.09545266628265381, "rewards_train/margins_2": 0.6077924966812134, "step": 38 }, { "epoch": 0.12, "logps_train/policy_1_2": -73.61320495605469, "logps_train/policy_1_l": -96.32263946533203, "logps_train/policy_1_w": -121.45654296875, "logps_train/policy_2_2": -61.73468780517578, "logps_train/policy_2_w": -139.1619110107422, "logps_train/ref_1_2": -78.0, "logps_train/ref_1_l": -86.5, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -69.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.45196080207824707, "rewards_train/1-l": -0.9807010889053345, "rewards_train/1-w": 1.6105948686599731, "rewards_train/2-2": 0.7023124694824219, "rewards_train/2-w": 1.2619333267211914, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.5912959575653076, "rewards_train/margins_1": 1.158634066581726, "rewards_train/margins_2": -0.5596208572387695, "step": 39 }, { "epoch": 0.12, "logps_train/policy_1_2": -178.96087646484375, "logps_train/policy_1_l": -151.2998046875, "logps_train/policy_1_w": -132.64163208007812, "logps_train/policy_2_2": -146.79229736328125, "logps_train/policy_2_w": -167.0780029296875, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.5461006164550781, "rewards_train/1-l": -0.8367191553115845, "rewards_train/1-w": 1.317868709564209, "rewards_train/2-2": 1.8426457643508911, "rewards_train/2-w": 1.0851681232452393, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.1545878648757935, "rewards_train/margins_1": -0.22823190689086914, "rewards_train/margins_2": 0.7574776411056519, "step": 39 }, { "epoch": 0.12, "logps_train/policy_1_2": -97.00848388671875, "logps_train/policy_1_l": -64.00328063964844, "logps_train/policy_1_w": -79.98008728027344, "logps_train/policy_2_2": -82.82809448242188, "logps_train/policy_2_w": -94.06511688232422, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -58.5, "logps_train/ref_1_w": -94.0, "logps_train/ref_2_2": -99.0, "logps_train/ref_2_w": -107.0, "rewards_train/1-2": 1.394463300704956, "rewards_train/1-l": -0.5376332998275757, "rewards_train/1-w": 1.3855845928192139, "rewards_train/2-2": 1.6328163146972656, "rewards_train/2-w": 1.2493476867675781, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.9232178926467896, "rewards_train/margins_1": -0.008878707885742188, "rewards_train/margins_2": 0.3834686279296875, "step": 39 }, { "epoch": 0.12, "logps_train/policy_1_2": -170.19573974609375, "logps_train/policy_1_l": -184.53314208984375, "logps_train/policy_1_w": -117.76780700683594, "logps_train/policy_2_2": -128.38819885253906, "logps_train/policy_2_w": -147.67910766601562, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.0343334674835205, "rewards_train/1-l": -0.8888610601425171, "rewards_train/1-w": 1.1950945854187012, "rewards_train/2-2": 1.5900862216949463, "rewards_train/2-w": 0.8664650917053223, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.0839556455612183, "rewards_train/margins_1": 0.16076111793518066, "rewards_train/margins_2": 0.723621129989624, "step": 39 }, { "epoch": 0.12, "logps_train/policy_1_2": -180.9249267578125, "logps_train/policy_1_l": -144.3749542236328, "logps_train/policy_1_w": -144.4147186279297, "logps_train/policy_2_2": -154.89096069335938, "logps_train/policy_2_w": -170.32901000976562, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 0.877819299697876, "rewards_train/1-l": -0.6585893630981445, "rewards_train/1-w": 1.2741527557373047, "rewards_train/2-2": 1.0101227760314941, "rewards_train/2-w": 0.8327237963676453, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.9327421188354492, "rewards_train/margins_1": 0.3963334560394287, "rewards_train/margins_2": 0.17739897966384888, "step": 39 }, { "epoch": 0.12, "logps_train/policy_1_2": -184.42442321777344, "logps_train/policy_1_l": -153.59017944335938, "logps_train/policy_1_w": -134.91458129882812, "logps_train/policy_2_2": -152.34255981445312, "logps_train/policy_2_w": -179.86045837402344, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.9747450351715088, "rewards_train/1-l": -1.5033533573150635, "rewards_train/1-w": 1.7210426330566406, "rewards_train/2-2": 2.3032429218292236, "rewards_train/2-w": 0.8327040076255798, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.224395990371704, "rewards_train/margins_1": -0.25370240211486816, "rewards_train/margins_2": 1.4705389142036438, "step": 39 }, { "epoch": 0.12, "logps_train/policy_1_2": -128.20721435546875, "logps_train/policy_1_l": -159.46197509765625, "logps_train/policy_1_w": -144.07655334472656, "logps_train/policy_2_2": -105.30873107910156, "logps_train/policy_2_w": -186.98171997070312, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.183967113494873, "rewards_train/1-l": -1.0295960903167725, "rewards_train/1-w": 1.929063081741333, "rewards_train/2-2": 1.3495948314666748, "rewards_train/2-w": 1.2455780506134033, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.9586591720581055, "rewards_train/margins_1": 0.74509596824646, "rewards_train/margins_2": 0.10401678085327148, "step": 39 }, { "epoch": 0.12, "logps_train/policy_1_2": -185.58033752441406, "logps_train/policy_1_l": -134.80776977539062, "logps_train/policy_1_w": -149.7860870361328, "logps_train/policy_2_2": -160.740966796875, "logps_train/policy_2_w": -169.04498291015625, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.438450813293457, "rewards_train/1-l": -0.24737781286239624, "rewards_train/1-w": 1.5252972841262817, "rewards_train/2-2": 1.7896238565444946, "rewards_train/2-w": 1.423626184463501, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.772675096988678, "rewards_train/margins_1": 0.08684647083282471, "rewards_train/margins_2": 0.36599767208099365, "step": 39 }, { "epoch": 0.12, "learning_rate": 4.998902091271986e-06, "loss": 1.0819, "step": 40 }, { "epoch": 0.12, "logps_train/policy_1_2": -98.175048828125, "logps_train/policy_1_l": -75.0255355834961, "logps_train/policy_1_w": -62.261932373046875, "logps_train/policy_2_2": -79.83563232421875, "logps_train/policy_2_w": -82.44593048095703, "logps_train/ref_1_2": -107.5, "logps_train/ref_1_l": -71.5, "logps_train/ref_1_w": -74.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": 0.9266357421875, "rewards_train/1-l": -0.36544400453567505, "rewards_train/1-w": 1.1956820487976074, "rewards_train/2-2": 1.128936529159546, "rewards_train/2-w": 1.0421255826950073, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.5611260533332825, "rewards_train/margins_1": 0.2690463066101074, "rewards_train/margins_2": 0.08681094646453857, "step": 40 }, { "epoch": 0.12, "logps_train/policy_1_2": -103.87985229492188, "logps_train/policy_1_l": -171.65809631347656, "logps_train/policy_1_w": -151.7634735107422, "logps_train/policy_2_2": -83.98831176757812, "logps_train/policy_2_w": -189.6614227294922, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 0.5370150804519653, "rewards_train/1-l": -1.0658094882965088, "rewards_train/1-w": 1.9549036026000977, "rewards_train/2-2": 0.6660127639770508, "rewards_train/2-w": 1.20260751247406, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.0207130908966064, "rewards_train/margins_1": 1.4178885221481323, "rewards_train/margins_2": -0.5365947484970093, "step": 40 }, { "epoch": 0.12, "logps_train/policy_1_2": -150.61331176757812, "logps_train/policy_1_l": -127.6958999633789, "logps_train/policy_1_w": -114.55693054199219, "logps_train/policy_2_2": -120.78754425048828, "logps_train/policy_2_w": -135.85513305664062, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 1.2050745487213135, "rewards_train/1-l": -1.1749601364135742, "rewards_train/1-w": 1.3165721893310547, "rewards_train/2-2": 1.47202730178833, "rewards_train/2-w": 1.0812838077545166, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.491532325744629, "rewards_train/margins_1": 0.11149764060974121, "rewards_train/margins_2": 0.3907434940338135, "step": 40 }, { "epoch": 0.12, "logps_train/policy_1_2": -182.47775268554688, "logps_train/policy_1_l": -108.51315307617188, "logps_train/policy_1_w": -125.22484588623047, "logps_train/policy_2_2": -153.88262939453125, "logps_train/policy_2_w": -154.02490234375, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -109.5, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.795973539352417, "rewards_train/1-l": 0.08735628426074982, "rewards_train/1-w": 2.154078483581543, "rewards_train/2-2": 2.083611249923706, "rewards_train/2-w": 1.6318835020065308, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.066722199320793, "rewards_train/margins_1": 0.358104944229126, "rewards_train/margins_2": 0.4517277479171753, "step": 40 }, { "epoch": 0.12, "logps_train/policy_1_2": -154.62738037109375, "logps_train/policy_1_l": -143.7509765625, "logps_train/policy_1_w": -186.0923309326172, "logps_train/policy_2_2": -129.49264526367188, "logps_train/policy_2_w": -212.62437438964844, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.3161675930023193, "rewards_train/1-l": -0.9688474535942078, "rewards_train/1-w": 2.420844554901123, "rewards_train/2-2": 1.366361141204834, "rewards_train/2-w": 1.710219383239746, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.389692008495331, "rewards_train/margins_1": 1.1046769618988037, "rewards_train/margins_2": -0.3438582420349121, "step": 40 }, { "epoch": 0.12, "logps_train/policy_1_2": -160.89532470703125, "logps_train/policy_1_l": -158.31649780273438, "logps_train/policy_1_w": -141.6251220703125, "logps_train/policy_2_2": -138.86190795898438, "logps_train/policy_2_w": -165.64645385742188, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 1.7573435306549072, "rewards_train/1-l": -1.4337491989135742, "rewards_train/1-w": 1.8464725017547607, "rewards_train/2-2": 2.205214500427246, "rewards_train/2-w": 1.3041043281555176, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.280221700668335, "rewards_train/margins_1": 0.08912897109985352, "rewards_train/margins_2": 0.9011101722717285, "step": 40 }, { "epoch": 0.12, "logps_train/policy_1_2": -234.47752380371094, "logps_train/policy_1_l": -244.7259063720703, "logps_train/policy_1_w": -175.29095458984375, "logps_train/policy_2_2": -190.06832885742188, "logps_train/policy_2_w": -220.00035095214844, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -232.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 1.3795909881591797, "rewards_train/1-l": -1.2593088150024414, "rewards_train/1-w": 1.9709031581878662, "rewards_train/2-2": 2.106450080871582, "rewards_train/2-w": 1.2312145233154297, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2302119731903076, "rewards_train/margins_1": 0.5913121700286865, "rewards_train/margins_2": 0.8752355575561523, "step": 40 }, { "epoch": 0.12, "logps_train/policy_1_2": -205.29092407226562, "logps_train/policy_1_l": -215.28802490234375, "logps_train/policy_1_w": -178.269775390625, "logps_train/policy_2_2": -174.55557250976562, "logps_train/policy_2_w": -213.9600067138672, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.5740326642990112, "rewards_train/1-l": -1.2860279083251953, "rewards_train/1-w": 1.9431402683258057, "rewards_train/2-2": 2.0389747619628906, "rewards_train/2-w": 1.6899378299713135, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.229168176651001, "rewards_train/margins_1": 0.36910760402679443, "rewards_train/margins_2": 0.34903693199157715, "step": 40 }, { "epoch": 0.12, "logps_train/policy_1_2": -145.864501953125, "logps_train/policy_1_l": -187.62228393554688, "logps_train/policy_1_w": -143.45913696289062, "logps_train/policy_2_2": -119.7889633178711, "logps_train/policy_2_w": -177.85916137695312, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.1233148574829102, "rewards_train/1-l": -0.9410372972488403, "rewards_train/1-w": 1.9240093231201172, "rewards_train/2-2": 1.3902443647384644, "rewards_train/2-w": 1.3343966007232666, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.8650466203689575, "rewards_train/margins_1": 0.800694465637207, "rewards_train/margins_2": 0.055847764015197754, "step": 41 }, { "epoch": 0.12, "logps_train/policy_1_2": -151.16094970703125, "logps_train/policy_1_l": -158.25625610351562, "logps_train/policy_1_w": -147.21922302246094, "logps_train/policy_2_2": -119.59109497070312, "logps_train/policy_2_w": -168.32968139648438, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 1.135467767715454, "rewards_train/1-l": -1.1045318841934204, "rewards_train/1-w": 1.7835460901260376, "rewards_train/2-2": 1.528390645980835, "rewards_train/2-w": 1.4764069318771362, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.888077974319458, "rewards_train/margins_1": 0.6480783224105835, "rewards_train/margins_2": 0.05198371410369873, "step": 41 }, { "epoch": 0.12, "logps_train/policy_1_2": -141.50157165527344, "logps_train/policy_1_l": -172.7744140625, "logps_train/policy_1_w": -148.81411743164062, "logps_train/policy_2_2": -110.68234252929688, "logps_train/policy_2_w": -178.77120971679688, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.0428357124328613, "rewards_train/1-l": -1.1387454271316528, "rewards_train/1-w": 1.5842127799987793, "rewards_train/2-2": 1.4033479690551758, "rewards_train/2-w": 1.0760037899017334, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.722958207130432, "rewards_train/margins_1": 0.541377067565918, "rewards_train/margins_2": 0.3273441791534424, "step": 41 }, { "epoch": 0.12, "logps_train/policy_1_2": -159.75634765625, "logps_train/policy_1_l": -83.70435333251953, "logps_train/policy_1_w": -117.60489654541016, "logps_train/policy_2_2": -145.85105895996094, "logps_train/policy_2_w": -132.359130859375, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -77.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 1.6899900436401367, "rewards_train/1-l": -0.6235599517822266, "rewards_train/1-w": 1.6887288093566895, "rewards_train/2-2": 1.9492688179016113, "rewards_train/2-w": 1.4406496286392212, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.312288761138916, "rewards_train/margins_1": -0.0012612342834472656, "rewards_train/margins_2": 0.5086191892623901, "step": 41 }, { "epoch": 0.12, "logps_train/policy_1_2": -240.07644653320312, "logps_train/policy_1_l": -197.08322143554688, "logps_train/policy_1_w": -141.24337768554688, "logps_train/policy_2_2": -208.26657104492188, "logps_train/policy_2_w": -161.28387451171875, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -236.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 2.0165746212005615, "rewards_train/1-l": -0.5122281908988953, "rewards_train/1-w": 1.2967568635940552, "rewards_train/2-2": 2.813966989517212, "rewards_train/2-w": 1.0388004779815674, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.8089850544929504, "rewards_train/margins_1": -0.7198177576065063, "rewards_train/margins_2": 1.7751665115356445, "step": 41 }, { "epoch": 0.12, "logps_train/policy_1_2": -135.1487274169922, "logps_train/policy_1_l": -165.54710388183594, "logps_train/policy_1_w": -109.43362426757812, "logps_train/policy_2_2": -122.7576904296875, "logps_train/policy_2_w": -120.4364242553711, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 0.674971342086792, "rewards_train/1-l": -0.14689825475215912, "rewards_train/1-w": 0.7074185609817505, "rewards_train/2-2": 0.8336056470870972, "rewards_train/2-w": 0.42823246121406555, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 0.8543168157339096, "rewards_train/margins_1": 0.032447218894958496, "rewards_train/margins_2": 0.4053731858730316, "step": 41 }, { "epoch": 0.12, "logps_train/policy_1_2": -123.11672973632812, "logps_train/policy_1_l": -179.5555419921875, "logps_train/policy_1_w": -94.37652587890625, "logps_train/policy_2_2": -105.67623901367188, "logps_train/policy_2_w": -115.00981903076172, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 1.3555142879486084, "rewards_train/1-l": -1.3371467590332031, "rewards_train/1-w": 1.5490658283233643, "rewards_train/2-2": 1.5198767185211182, "rewards_train/2-w": 1.3130804300308228, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.8862125873565674, "rewards_train/margins_1": 0.19355154037475586, "rewards_train/margins_2": 0.2067962884902954, "step": 41 }, { "epoch": 0.12, "logps_train/policy_1_2": -214.39341735839844, "logps_train/policy_1_l": -190.17227172851562, "logps_train/policy_1_w": -147.15370178222656, "logps_train/policy_2_2": -179.60768127441406, "logps_train/policy_2_w": -182.42193603515625, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 1.707533597946167, "rewards_train/1-l": -1.2095136642456055, "rewards_train/1-w": 2.1318953037261963, "rewards_train/2-2": 2.4118881225585938, "rewards_train/2-w": 1.8681585788726807, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3414089679718018, "rewards_train/margins_1": 0.4243617057800293, "rewards_train/margins_2": 0.5437295436859131, "step": 41 }, { "epoch": 0.13, "learning_rate": 4.99804827338393e-06, "loss": 0.9689, "step": 42 }, { "epoch": 0.13, "logps_train/policy_1_2": -117.66638946533203, "logps_train/policy_1_l": -206.20965576171875, "logps_train/policy_1_w": -176.44979858398438, "logps_train/policy_2_2": -103.40853118896484, "logps_train/policy_2_w": -201.85696411132812, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -197.0, "logps_train/ref_2_2": -114.5, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": 0.9224233031272888, "rewards_train/1-l": -1.1537787914276123, "rewards_train/1-w": 2.0745506286621094, "rewards_train/2-2": 1.115006446838379, "rewards_train/2-w": 1.7244608402252197, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.2283294200897217, "rewards_train/margins_1": 1.1521273255348206, "rewards_train/margins_2": -0.6094543933868408, "step": 42 }, { "epoch": 0.13, "logps_train/policy_1_2": -71.27603149414062, "logps_train/policy_1_l": -85.48893737792969, "logps_train/policy_1_w": -93.0847396850586, "logps_train/policy_2_2": -62.60391616821289, "logps_train/policy_2_w": -117.41780853271484, "logps_train/ref_1_2": -76.0, "logps_train/ref_1_l": -80.0, "logps_train/ref_1_w": -106.5, "logps_train/ref_2_2": -68.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 0.4598970413208008, "rewards_train/1-l": -0.5543626546859741, "rewards_train/1-w": 1.321213722229004, "rewards_train/2-2": 0.533456027507782, "rewards_train/2-w": 0.9582191705703735, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.125, "rewards_train/margins": 1.875576376914978, "rewards_train/margins_1": 0.8613166809082031, "rewards_train/margins_2": -0.42476314306259155, "step": 42 }, { "epoch": 0.13, "logps_train/policy_1_2": -150.40347290039062, "logps_train/policy_1_l": -117.31767272949219, "logps_train/policy_1_w": -103.17141723632812, "logps_train/policy_2_2": -129.27359008789062, "logps_train/policy_2_w": -120.09012603759766, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.4291841983795166, "rewards_train/1-l": -0.8704389333724976, "rewards_train/1-w": 0.7953575849533081, "rewards_train/2-2": 1.6226398944854736, "rewards_train/2-w": 0.5550498962402344, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.6657965183258057, "rewards_train/margins_1": -0.6338266134262085, "rewards_train/margins_2": 1.0675899982452393, "step": 42 }, { "epoch": 0.13, "logps_train/policy_1_2": -114.1718521118164, "logps_train/policy_1_l": -122.98143005371094, "logps_train/policy_1_w": -115.87000274658203, "logps_train/policy_2_2": -96.88762664794922, "logps_train/policy_2_w": -131.7135772705078, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -116.5, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.9906277656555176, "rewards_train/1-l": -0.6809559464454651, "rewards_train/1-w": 0.5647578835487366, "rewards_train/2-2": 1.1674871444702148, "rewards_train/2-w": 0.2421184480190277, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.2457138299942017, "rewards_train/margins_1": -0.425869882106781, "rewards_train/margins_2": 0.9253686964511871, "step": 42 }, { "epoch": 0.13, "logps_train/policy_1_2": -166.87489318847656, "logps_train/policy_1_l": -151.79324340820312, "logps_train/policy_1_w": -130.912353515625, "logps_train/policy_2_2": -145.918701171875, "logps_train/policy_2_w": -155.21823120117188, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.020322322845459, "rewards_train/1-l": -0.940750002861023, "rewards_train/1-w": 0.8368907570838928, "rewards_train/2-2": 1.4112544059753418, "rewards_train/2-w": 0.6539580225944519, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.7776407599449158, "rewards_train/margins_1": -0.18343156576156616, "rewards_train/margins_2": 0.7572963833808899, "step": 42 }, { "epoch": 0.13, "logps_train/policy_1_2": -188.5735626220703, "logps_train/policy_1_l": -184.58387756347656, "logps_train/policy_1_w": -164.77438354492188, "logps_train/policy_2_2": -164.2362823486328, "logps_train/policy_2_w": -185.20632934570312, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 1.1860029697418213, "rewards_train/1-l": -0.576356053352356, "rewards_train/1-w": 1.8881855010986328, "rewards_train/2-2": 1.715043544769287, "rewards_train/2-w": 1.7590532302856445, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.4645415544509888, "rewards_train/margins_1": 0.7021825313568115, "rewards_train/margins_2": -0.04400968551635742, "step": 42 }, { "epoch": 0.13, "logps_train/policy_1_2": -212.62657165527344, "logps_train/policy_1_l": -211.94461059570312, "logps_train/policy_1_w": -123.6266098022461, "logps_train/policy_2_2": -180.18280029296875, "logps_train/policy_2_w": -160.08740234375, "logps_train/ref_1_2": -227.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -201.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.4248430728912354, "rewards_train/1-l": -1.5382122993469238, "rewards_train/1-w": 1.6217141151428223, "rewards_train/2-2": 2.1160943508148193, "rewards_train/2-w": 1.2068839073181152, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.159926414489746, "rewards_train/margins_1": 0.19687104225158691, "rewards_train/margins_2": 0.9092104434967041, "step": 42 }, { "epoch": 0.13, "logps_train/policy_1_2": -61.96156311035156, "logps_train/policy_1_l": -112.33094787597656, "logps_train/policy_1_w": -88.90164184570312, "logps_train/policy_2_2": -52.566246032714844, "logps_train/policy_2_w": -104.39279174804688, "logps_train/ref_1_2": -69.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -59.5, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": 0.7046248912811279, "rewards_train/1-l": -0.9289935827255249, "rewards_train/1-w": 1.4024144411087036, "rewards_train/2-2": 0.7177896499633789, "rewards_train/2-w": 1.221072793006897, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 2.3314080238342285, "rewards_train/margins_1": 0.6977895498275757, "rewards_train/margins_2": -0.5032831430435181, "step": 42 }, { "epoch": 0.13, "logps_train/policy_1_2": -189.20721435546875, "logps_train/policy_1_l": -160.80819702148438, "logps_train/policy_1_w": -116.34495544433594, "logps_train/policy_2_2": -171.90916442871094, "logps_train/policy_2_w": -133.1895294189453, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 1.4792790412902832, "rewards_train/1-l": -1.09175705909729, "rewards_train/1-w": 1.206910252571106, "rewards_train/2-2": 1.9114266633987427, "rewards_train/2-w": 1.1591718196868896, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.298667311668396, "rewards_train/margins_1": -0.27236878871917725, "rewards_train/margins_2": 0.752254843711853, "step": 43 }, { "epoch": 0.13, "logps_train/policy_1_2": -136.9957733154297, "logps_train/policy_1_l": -147.52821350097656, "logps_train/policy_1_w": -130.36521911621094, "logps_train/policy_2_2": -103.2625503540039, "logps_train/policy_2_w": -159.564453125, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.918391227722168, "rewards_train/1-l": -1.1388570070266724, "rewards_train/1-w": 1.1318374872207642, "rewards_train/2-2": 1.4745266437530518, "rewards_train/2-w": 0.9154291152954102, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.2706944942474365, "rewards_train/margins_1": 0.2134462594985962, "rewards_train/margins_2": 0.5590975284576416, "step": 43 }, { "epoch": 0.13, "logps_train/policy_1_2": -85.63531494140625, "logps_train/policy_1_l": -93.19120788574219, "logps_train/policy_1_w": -69.44097900390625, "logps_train/policy_2_2": -66.03050231933594, "logps_train/policy_2_w": -88.78897857666016, "logps_train/ref_1_2": -91.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -81.5, "logps_train/ref_2_2": -75.0, "logps_train/ref_2_w": -96.5, "rewards_train/1-2": 0.5677181482315063, "rewards_train/1-l": -0.8954880237579346, "rewards_train/1-w": 1.2035586833953857, "rewards_train/2-2": 0.9059338569641113, "rewards_train/2-w": 0.7613365650177002, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.0990467071533203, "rewards_train/margins_1": 0.6358405351638794, "rewards_train/margins_2": 0.14459729194641113, "step": 43 }, { "epoch": 0.13, "logps_train/policy_1_2": -199.9755859375, "logps_train/policy_1_l": -207.08572387695312, "logps_train/policy_1_w": -222.64564514160156, "logps_train/policy_2_2": -156.6991424560547, "logps_train/policy_2_w": -259.2119445800781, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -245.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -276.0, "rewards_train/1-2": 2.0446290969848633, "rewards_train/1-l": -1.1967926025390625, "rewards_train/1-w": 2.296372413635254, "rewards_train/2-2": 2.9363369941711426, "rewards_train/2-w": 1.6397418975830078, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4931650161743164, "rewards_train/margins_1": 0.2517433166503906, "rewards_train/margins_2": 1.2965950965881348, "step": 43 }, { "epoch": 0.13, "logps_train/policy_1_2": -166.76544189453125, "logps_train/policy_1_l": -164.77294921875, "logps_train/policy_1_w": -137.17665100097656, "logps_train/policy_2_2": -135.92739868164062, "logps_train/policy_2_w": -168.08505249023438, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 0.9195488095283508, "rewards_train/1-l": -1.2155768871307373, "rewards_train/1-w": 2.266709089279175, "rewards_train/2-2": 1.1416347026824951, "rewards_train/2-w": 1.8086826801300049, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 3.482285976409912, "rewards_train/margins_1": 1.347160279750824, "rewards_train/margins_2": -0.6670479774475098, "step": 43 }, { "epoch": 0.13, "logps_train/policy_1_2": -144.32644653320312, "logps_train/policy_1_l": -119.15754699707031, "logps_train/policy_1_w": -112.79010009765625, "logps_train/policy_2_2": -123.14471435546875, "logps_train/policy_2_w": -141.01651000976562, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.054855465888977, "rewards_train/1-l": -1.0970048904418945, "rewards_train/1-w": 1.4459891319274902, "rewards_train/2-2": 1.2925599813461304, "rewards_train/2-w": 1.0889739990234375, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.5429940223693848, "rewards_train/margins_1": 0.3911336660385132, "rewards_train/margins_2": 0.20358598232269287, "step": 43 }, { "epoch": 0.13, "logps_train/policy_1_2": -191.6029052734375, "logps_train/policy_1_l": -240.0203857421875, "logps_train/policy_1_w": -247.1307373046875, "logps_train/policy_2_2": -166.5696258544922, "logps_train/policy_2_w": -283.88043212890625, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -227.0, "logps_train/ref_1_w": -270.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -304.0, "rewards_train/1-2": 1.7201783657073975, "rewards_train/1-l": -1.3145400285720825, "rewards_train/1-w": 2.371300458908081, "rewards_train/2-2": 2.071943759918213, "rewards_train/2-w": 1.8369556665420532, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.6858404874801636, "rewards_train/margins_1": 0.6511220932006836, "rewards_train/margins_2": 0.23498809337615967, "step": 43 }, { "epoch": 0.13, "logps_train/policy_1_2": -134.9092254638672, "logps_train/policy_1_l": -133.58383178710938, "logps_train/policy_1_w": -125.65489959716797, "logps_train/policy_2_2": -117.56262969970703, "logps_train/policy_2_w": -156.82955932617188, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 1.2559529542922974, "rewards_train/1-l": -0.66463303565979, "rewards_train/1-w": 1.893885612487793, "rewards_train/2-2": 1.5281121730804443, "rewards_train/2-w": 1.1732940673828125, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.558518648147583, "rewards_train/margins_1": 0.6379326581954956, "rewards_train/margins_2": 0.35481810569763184, "step": 43 }, { "epoch": 0.13, "learning_rate": 4.996950650388179e-06, "loss": 1.0139, "step": 44 }, { "epoch": 0.13, "logps_train/policy_1_2": -140.61912536621094, "logps_train/policy_1_l": -90.1231918334961, "logps_train/policy_1_w": -109.55577087402344, "logps_train/policy_2_2": -117.0308837890625, "logps_train/policy_2_w": -128.32325744628906, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -86.0, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.6537119150161743, "rewards_train/1-l": -0.4263814687728882, "rewards_train/1-w": 0.9803599119186401, "rewards_train/2-2": 1.0687867403030396, "rewards_train/2-w": 0.6129873394966125, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.4067413806915283, "rewards_train/margins_1": 0.3266479969024658, "rewards_train/margins_2": 0.455799400806427, "step": 44 }, { "epoch": 0.13, "logps_train/policy_1_2": -183.4886474609375, "logps_train/policy_1_l": -211.6165313720703, "logps_train/policy_1_w": -174.83621215820312, "logps_train/policy_2_2": -156.12081909179688, "logps_train/policy_2_w": -210.62649536132812, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 1.6628544330596924, "rewards_train/1-l": -1.2014970779418945, "rewards_train/1-w": 2.6023178100585938, "rewards_train/2-2": 2.224637746810913, "rewards_train/2-w": 2.062349796295166, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.8038148880004883, "rewards_train/margins_1": 0.9394633769989014, "rewards_train/margins_2": 0.16228795051574707, "step": 44 }, { "epoch": 0.13, "logps_train/policy_1_2": -114.74703216552734, "logps_train/policy_1_l": -149.84326171875, "logps_train/policy_1_w": -73.49546813964844, "logps_train/policy_2_2": -102.87376403808594, "logps_train/policy_2_w": -78.30464935302734, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -79.5, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -85.0, "rewards_train/1-2": 0.5407267808914185, "rewards_train/1-l": -0.7857893705368042, "rewards_train/1-w": 0.6002583503723145, "rewards_train/2-2": 0.5919204950332642, "rewards_train/2-w": 0.6328158378601074, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 1.3860477209091187, "rewards_train/margins_1": 0.059531569480895996, "rewards_train/margins_2": -0.04089534282684326, "step": 44 }, { "epoch": 0.13, "logps_train/policy_1_2": -109.1158676147461, "logps_train/policy_1_l": -121.48892211914062, "logps_train/policy_1_w": -74.07699584960938, "logps_train/policy_2_2": -96.20632934570312, "logps_train/policy_2_w": -89.20263671875, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -95.5, "rewards_train/1-2": 0.676011323928833, "rewards_train/1-l": -1.1244782209396362, "rewards_train/1-w": 0.9883942604064941, "rewards_train/2-2": 0.610129177570343, "rewards_train/2-w": 0.6305174231529236, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.1128724813461304, "rewards_train/margins_1": 0.31238293647766113, "rewards_train/margins_2": -0.020388245582580566, "step": 44 }, { "epoch": 0.13, "logps_train/policy_1_2": -120.14152526855469, "logps_train/policy_1_l": -93.87134552001953, "logps_train/policy_1_w": -145.21043395996094, "logps_train/policy_2_2": -101.81468200683594, "logps_train/policy_2_w": -172.4955291748047, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 0.7178791165351868, "rewards_train/1-l": -0.5519782900810242, "rewards_train/1-w": 1.1477071046829224, "rewards_train/2-2": 0.7420673370361328, "rewards_train/2-w": 0.7629477977752686, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.6996853947639465, "rewards_train/margins_1": 0.4298279881477356, "rewards_train/margins_2": -0.020880460739135742, "step": 44 }, { "epoch": 0.13, "logps_train/policy_1_2": -76.61611938476562, "logps_train/policy_1_l": -93.59523010253906, "logps_train/policy_1_w": -74.44471740722656, "logps_train/policy_2_2": -62.104766845703125, "logps_train/policy_2_w": -89.53533935546875, "logps_train/ref_1_2": -84.0, "logps_train/ref_1_l": -88.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -69.0, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": 0.7133879065513611, "rewards_train/1-l": -0.5343271493911743, "rewards_train/1-w": 0.9832625389099121, "rewards_train/2-2": 0.6988986730575562, "rewards_train/2-w": 0.5277156233787537, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.5175896883010864, "rewards_train/margins_1": 0.269874632358551, "rewards_train/margins_2": 0.1711830496788025, "step": 44 }, { "epoch": 0.13, "logps_train/policy_1_2": -133.4314422607422, "logps_train/policy_1_l": -127.04130554199219, "logps_train/policy_1_w": -121.17691802978516, "logps_train/policy_2_2": -101.00669860839844, "logps_train/policy_2_w": -159.82632446289062, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -114.5, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -117.5, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": 1.1131060123443604, "rewards_train/1-l": -1.2597943544387817, "rewards_train/1-w": 1.724495768547058, "rewards_train/2-2": 1.6415166854858398, "rewards_train/2-w": 1.1568195819854736, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.98429012298584, "rewards_train/margins_1": 0.6113897562026978, "rewards_train/margins_2": 0.4846971035003662, "step": 44 }, { "epoch": 0.13, "logps_train/policy_1_2": -51.87240982055664, "logps_train/policy_1_l": -98.92547607421875, "logps_train/policy_1_w": -75.887451171875, "logps_train/policy_2_2": -46.094608306884766, "logps_train/policy_2_w": -80.79595184326172, "logps_train/ref_1_2": -57.5, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -83.0, "logps_train/ref_2_2": -53.5, "logps_train/ref_2_w": -87.0, "rewards_train/1-2": 0.5768213868141174, "rewards_train/1-l": -0.7587587833404541, "rewards_train/1-w": 0.710473895072937, "rewards_train/2-2": 0.7241330146789551, "rewards_train/2-w": 0.6141551733016968, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.4692326784133911, "rewards_train/margins_1": 0.13365250825881958, "rewards_train/margins_2": 0.1099778413772583, "step": 44 }, { "epoch": 0.13, "logps_train/policy_1_2": -125.2532958984375, "logps_train/policy_1_l": -166.05441284179688, "logps_train/policy_1_w": -136.02935791015625, "logps_train/policy_2_2": -109.6392593383789, "logps_train/policy_2_w": -160.6210174560547, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 0.7418575286865234, "rewards_train/1-l": -1.1913793087005615, "rewards_train/1-w": 1.5001901388168335, "rewards_train/2-2": 0.9532616138458252, "rewards_train/2-w": 0.9753990173339844, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.691569447517395, "rewards_train/margins_1": 0.7583326101303101, "rewards_train/margins_2": -0.02213740348815918, "step": 45 }, { "epoch": 0.13, "logps_train/policy_1_2": -111.05029296875, "logps_train/policy_1_l": -183.69070434570312, "logps_train/policy_1_w": -141.8924560546875, "logps_train/policy_2_2": -88.26921081542969, "logps_train/policy_2_w": -186.00830078125, "logps_train/ref_1_2": -122.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.129346251487732, "rewards_train/1-l": -1.549257516860962, "rewards_train/1-w": 1.9142696857452393, "rewards_train/2-2": 1.3340160846710205, "rewards_train/2-w": 1.169481873512268, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.463527202606201, "rewards_train/margins_1": 0.7849234342575073, "rewards_train/margins_2": 0.16453421115875244, "step": 45 }, { "epoch": 0.13, "logps_train/policy_1_2": -131.11734008789062, "logps_train/policy_1_l": -185.44247436523438, "logps_train/policy_1_w": -149.48631286621094, "logps_train/policy_2_2": -114.48312377929688, "logps_train/policy_2_w": -171.01028442382812, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 0.9420253038406372, "rewards_train/1-l": -1.4965925216674805, "rewards_train/1-w": 1.4466814994812012, "rewards_train/2-2": 1.24924635887146, "rewards_train/2-w": 1.4349091053009033, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.9432740211486816, "rewards_train/margins_1": 0.504656195640564, "rewards_train/margins_2": -0.18566274642944336, "step": 45 }, { "epoch": 0.13, "logps_train/policy_1_2": -142.86050415039062, "logps_train/policy_1_l": -159.52037048339844, "logps_train/policy_1_w": -75.76943969726562, "logps_train/policy_2_2": -112.98347473144531, "logps_train/policy_2_w": -106.87464904785156, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 1.0932457447052002, "rewards_train/1-l": -1.543149709701538, "rewards_train/1-w": 1.2871180772781372, "rewards_train/2-2": 1.547844409942627, "rewards_train/2-w": 0.9687848687171936, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8302677869796753, "rewards_train/margins_1": 0.193872332572937, "rewards_train/margins_2": 0.5790595412254333, "step": 45 }, { "epoch": 0.13, "logps_train/policy_1_2": -95.13321685791016, "logps_train/policy_1_l": -105.06156158447266, "logps_train/policy_1_w": -111.64752197265625, "logps_train/policy_2_2": -80.48966217041016, "logps_train/policy_2_w": -132.88088989257812, "logps_train/ref_1_2": -101.0, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -88.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": 0.5608973503112793, "rewards_train/1-l": -0.6213904619216919, "rewards_train/1-w": 0.7910101413726807, "rewards_train/2-2": 0.7190027832984924, "rewards_train/2-w": 0.4357401430606842, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.4124006032943726, "rewards_train/margins_1": 0.23011279106140137, "rewards_train/margins_2": 0.2832626402378082, "step": 45 }, { "epoch": 0.13, "logps_train/policy_1_2": -80.01667785644531, "logps_train/policy_1_l": -123.08866882324219, "logps_train/policy_1_w": -89.29867553710938, "logps_train/policy_2_2": -61.619720458984375, "logps_train/policy_2_w": -109.63750457763672, "logps_train/ref_1_2": -82.5, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -66.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 0.2342696487903595, "rewards_train/1-l": -0.9018353819847107, "rewards_train/1-w": 1.3591943979263306, "rewards_train/2-2": 0.4591217041015625, "rewards_train/2-w": 1.1221866607666016, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 2.2610297799110413, "rewards_train/margins_1": 1.124924749135971, "rewards_train/margins_2": -0.6630649566650391, "step": 45 }, { "epoch": 0.13, "logps_train/policy_1_2": -259.87274169921875, "logps_train/policy_1_l": -182.02053833007812, "logps_train/policy_1_w": -140.81948852539062, "logps_train/policy_2_2": -227.47283935546875, "logps_train/policy_2_w": -176.927490234375, "logps_train/ref_1_2": -280.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -255.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.9994447231292725, "rewards_train/1-l": -1.1883810758590698, "rewards_train/1-w": 1.5930508375167847, "rewards_train/2-2": 2.7784981727600098, "rewards_train/2-w": 1.1228762865066528, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.7814319133758545, "rewards_train/margins_1": -0.4063938856124878, "rewards_train/margins_2": 1.655621886253357, "step": 45 }, { "epoch": 0.13, "logps_train/policy_1_2": -76.062744140625, "logps_train/policy_1_l": -63.20858383178711, "logps_train/policy_1_w": -81.65281677246094, "logps_train/policy_2_2": -61.8368034362793, "logps_train/policy_2_w": -99.33232116699219, "logps_train/ref_1_2": -82.0, "logps_train/ref_1_l": -58.5, "logps_train/ref_1_w": -89.0, "logps_train/ref_2_2": -68.0, "logps_train/ref_2_w": -104.0, "rewards_train/1-2": 0.5734129548072815, "rewards_train/1-l": -0.46480369567871094, "rewards_train/1-w": 0.703468918800354, "rewards_train/2-2": 0.6467884182929993, "rewards_train/2-w": 0.44957998394966125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.168272614479065, "rewards_train/margins_1": 0.1300559639930725, "rewards_train/margins_2": 0.197208434343338, "step": 45 }, { "epoch": 0.14, "learning_rate": 4.9956093294108046e-06, "loss": 1.0813, "step": 46 }, { "epoch": 0.14, "logps_train/policy_1_2": -123.3262939453125, "logps_train/policy_1_l": -130.66506958007812, "logps_train/policy_1_w": -166.95028686523438, "logps_train/policy_2_2": -96.44287109375, "logps_train/policy_2_w": -199.9384307861328, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 0.9079962968826294, "rewards_train/1-l": -0.9333045482635498, "rewards_train/1-w": 2.0948143005371094, "rewards_train/2-2": 1.3061037063598633, "rewards_train/2-w": 1.3678759336471558, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.028118848800659, "rewards_train/margins_1": 1.18681800365448, "rewards_train/margins_2": -0.06177222728729248, "step": 46 }, { "epoch": 0.14, "logps_train/policy_1_2": -128.57130432128906, "logps_train/policy_1_l": -150.15220642089844, "logps_train/policy_1_w": -108.49669647216797, "logps_train/policy_2_2": -116.07030487060547, "logps_train/policy_2_w": -121.17910766601562, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.2788069248199463, "rewards_train/1-l": -0.9296736717224121, "rewards_train/1-w": 0.8523812294006348, "rewards_train/2-2": 1.482226848602295, "rewards_train/2-w": 0.7969334125518799, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.7820549011230469, "rewards_train/margins_1": -0.4264256954193115, "rewards_train/margins_2": 0.685293436050415, "step": 46 }, { "epoch": 0.14, "logps_train/policy_1_2": -136.1864013671875, "logps_train/policy_1_l": -157.52227783203125, "logps_train/policy_1_w": -120.25186157226562, "logps_train/policy_2_2": -106.31804656982422, "logps_train/policy_2_w": -140.03411865234375, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 0.6536245942115784, "rewards_train/1-l": -1.629570484161377, "rewards_train/1-w": 1.130283236503601, "rewards_train/2-2": 0.9431957006454468, "rewards_train/2-w": 0.8020562529563904, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.759853720664978, "rewards_train/margins_1": 0.4766586422920227, "rewards_train/margins_2": 0.1411394476890564, "step": 46 }, { "epoch": 0.14, "logps_train/policy_1_2": -143.3551483154297, "logps_train/policy_1_l": -79.88740539550781, "logps_train/policy_1_w": -157.94076538085938, "logps_train/policy_2_2": -129.12118530273438, "logps_train/policy_2_w": -187.50164794921875, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -76.5, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 0.994953989982605, "rewards_train/1-l": -0.3347364068031311, "rewards_train/1-w": 2.3996739387512207, "rewards_train/2-2": 1.2980382442474365, "rewards_train/2-w": 1.5779600143432617, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.734410345554352, "rewards_train/margins_1": 1.4047199487686157, "rewards_train/margins_2": -0.2799217700958252, "step": 46 }, { "epoch": 0.14, "logps_train/policy_1_2": -105.0327377319336, "logps_train/policy_1_l": -61.288719177246094, "logps_train/policy_1_w": -89.76518249511719, "logps_train/policy_2_2": -88.79258728027344, "logps_train/policy_2_w": -121.31512451171875, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -57.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -99.5, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.6328834295272827, "rewards_train/1-l": -0.4314107894897461, "rewards_train/1-w": 2.0152781009674072, "rewards_train/2-2": 1.0552634000778198, "rewards_train/2-w": 1.4532520771026611, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.4466888904571533, "rewards_train/margins_1": 1.3823946714401245, "rewards_train/margins_2": -0.3979886770248413, "step": 46 }, { "epoch": 0.14, "logps_train/policy_1_2": -101.69983673095703, "logps_train/policy_1_l": -100.7948226928711, "logps_train/policy_1_w": -114.85578918457031, "logps_train/policy_2_2": -83.84947204589844, "logps_train/policy_2_w": -136.19854736328125, "logps_train/ref_1_2": -108.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.631578803062439, "rewards_train/1-l": -0.8150293827056885, "rewards_train/1-w": 1.2546072006225586, "rewards_train/2-2": 0.8939592838287354, "rewards_train/2-w": 0.9819028973579407, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.069636583328247, "rewards_train/margins_1": 0.6230283975601196, "rewards_train/margins_2": -0.08794361352920532, "step": 46 }, { "epoch": 0.14, "logps_train/policy_1_2": -219.82093811035156, "logps_train/policy_1_l": -179.5806121826172, "logps_train/policy_1_w": -116.91116333007812, "logps_train/policy_2_2": -181.34645080566406, "logps_train/policy_2_w": -140.2646484375, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.4858744144439697, "rewards_train/1-l": -1.403374195098877, "rewards_train/1-w": 1.3010716438293457, "rewards_train/2-2": 2.226292610168457, "rewards_train/2-w": 1.0219731330871582, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7044458389282227, "rewards_train/margins_1": -0.18480277061462402, "rewards_train/margins_2": 1.2043194770812988, "step": 46 }, { "epoch": 0.14, "logps_train/policy_1_2": -182.94708251953125, "logps_train/policy_1_l": -168.55076599121094, "logps_train/policy_1_w": -195.1292724609375, "logps_train/policy_2_2": -146.73843383789062, "logps_train/policy_2_w": -248.3243408203125, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -222.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -264.0, "rewards_train/1-2": 1.9334162473678589, "rewards_train/1-l": -1.749608039855957, "rewards_train/1-w": 2.693321704864502, "rewards_train/2-2": 2.443342685699463, "rewards_train/2-w": 1.451939344406128, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.442929744720459, "rewards_train/margins_1": 0.7599054574966431, "rewards_train/margins_2": 0.991403341293335, "step": 46 }, { "epoch": 0.14, "logps_train/policy_1_2": -149.3001708984375, "logps_train/policy_1_l": -109.94811248779297, "logps_train/policy_1_w": -102.85882568359375, "logps_train/policy_2_2": -116.59745025634766, "logps_train/policy_2_w": -125.5517807006836, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -115.5, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 0.585608720779419, "rewards_train/1-l": -0.6840689182281494, "rewards_train/1-w": 1.2959532737731934, "rewards_train/2-2": 1.0527549982070923, "rewards_train/2-w": 0.9846655130386353, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.9800221920013428, "rewards_train/margins_1": 0.7103445529937744, "rewards_train/margins_2": 0.06808948516845703, "step": 47 }, { "epoch": 0.14, "logps_train/policy_1_2": -188.7311553955078, "logps_train/policy_1_l": -204.96170043945312, "logps_train/policy_1_w": -180.62701416015625, "logps_train/policy_2_2": -164.51583862304688, "logps_train/policy_2_w": -226.20045471191406, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -236.0, "rewards_train/1-2": 1.480790615081787, "rewards_train/1-l": -1.6492955684661865, "rewards_train/1-w": 1.9357354640960693, "rewards_train/2-2": 1.7241971492767334, "rewards_train/2-w": 1.0283925533294678, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.585031032562256, "rewards_train/margins_1": 0.4549448490142822, "rewards_train/margins_2": 0.6958045959472656, "step": 47 }, { "epoch": 0.14, "logps_train/policy_1_2": -198.98648071289062, "logps_train/policy_1_l": -99.05186462402344, "logps_train/policy_1_w": -95.58229064941406, "logps_train/policy_2_2": -182.41702270507812, "logps_train/policy_2_w": -119.99365234375, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.074007511138916, "rewards_train/1-l": -0.7462514042854309, "rewards_train/1-w": 1.4331769943237305, "rewards_train/2-2": 1.4707984924316406, "rewards_train/2-w": 1.1162594556808472, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.1794283986091614, "rewards_train/margins_1": 0.35916948318481445, "rewards_train/margins_2": 0.35453903675079346, "step": 47 }, { "epoch": 0.14, "logps_train/policy_1_2": -222.98643493652344, "logps_train/policy_1_l": -229.47454833984375, "logps_train/policy_1_w": -143.14422607421875, "logps_train/policy_2_2": -192.53831481933594, "logps_train/policy_2_w": -181.04876708984375, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.2564343214035034, "rewards_train/1-l": -2.0726513862609863, "rewards_train/1-w": 1.9914356470108032, "rewards_train/2-2": 1.6979267597198486, "rewards_train/2-w": 1.4826219081878662, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.0640870332717896, "rewards_train/margins_1": 0.7350013256072998, "rewards_train/margins_2": 0.21530485153198242, "step": 47 }, { "epoch": 0.14, "logps_train/policy_1_2": -81.67896270751953, "logps_train/policy_1_l": -51.85845184326172, "logps_train/policy_1_w": -55.99812316894531, "logps_train/policy_2_2": -69.68389892578125, "logps_train/policy_2_w": -77.91714477539062, "logps_train/ref_1_2": -85.5, "logps_train/ref_1_l": -48.5, "logps_train/ref_1_w": -64.0, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -83.0, "rewards_train/1-2": 0.38425180315971375, "rewards_train/1-l": -0.3227590322494507, "rewards_train/1-w": 0.8163983821868896, "rewards_train/2-2": 0.4222354292869568, "rewards_train/2-w": 0.5371922254562378, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 1.1391574144363403, "rewards_train/margins_1": 0.4321465790271759, "rewards_train/margins_2": -0.114956796169281, "step": 47 }, { "epoch": 0.14, "logps_train/policy_1_2": -225.708984375, "logps_train/policy_1_l": -212.08876037597656, "logps_train/policy_1_w": -196.42440795898438, "logps_train/policy_2_2": -188.98324584960938, "logps_train/policy_2_w": -225.77117919921875, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -199.0, "logps_train/ref_1_w": -219.0, "logps_train/ref_2_2": -215.0, "logps_train/ref_2_w": -241.0, "rewards_train/1-2": 2.1697278022766113, "rewards_train/1-l": -1.2948130369186401, "rewards_train/1-w": 2.245060920715332, "rewards_train/2-2": 2.670427083969116, "rewards_train/2-w": 1.5103830099105835, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.539873957633972, "rewards_train/margins_1": 0.0753331184387207, "rewards_train/margins_2": 1.1600440740585327, "step": 47 }, { "epoch": 0.14, "logps_train/policy_1_2": -91.817626953125, "logps_train/policy_1_l": -78.60691833496094, "logps_train/policy_1_w": -91.2573013305664, "logps_train/policy_2_2": -78.841552734375, "logps_train/policy_2_w": -108.39515686035156, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -77.0, "logps_train/ref_1_w": -98.5, "logps_train/ref_2_2": -90.0, "logps_train/ref_2_w": -112.5, "rewards_train/1-2": 0.8198002576828003, "rewards_train/1-l": -0.17787881195545197, "rewards_train/1-w": 0.6924335956573486, "rewards_train/2-2": 1.0924079418182373, "rewards_train/2-w": 0.4128279685974121, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 0.8703124076128006, "rewards_train/margins_1": -0.12736666202545166, "rewards_train/margins_2": 0.6795799732208252, "step": 47 }, { "epoch": 0.14, "logps_train/policy_1_2": -130.393310546875, "logps_train/policy_1_l": -182.07870483398438, "logps_train/policy_1_w": -149.7914276123047, "logps_train/policy_2_2": -115.91675567626953, "logps_train/policy_2_w": -174.50537109375, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 0.952953577041626, "rewards_train/1-l": -1.3313082456588745, "rewards_train/1-w": 1.7302322387695312, "rewards_train/2-2": 0.9698482155799866, "rewards_train/2-w": 1.3994624614715576, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.0615404844284058, "rewards_train/margins_1": 0.7772786617279053, "rewards_train/margins_2": -0.42961424589157104, "step": 47 }, { "epoch": 0.14, "learning_rate": 4.994024441362366e-06, "loss": 0.9673, "step": 48 }, { "epoch": 0.14, "logps_train/policy_1_2": -85.75460815429688, "logps_train/policy_1_l": -43.69265365600586, "logps_train/policy_1_w": -67.42869567871094, "logps_train/policy_2_2": -68.59798431396484, "logps_train/policy_2_w": -83.71471405029297, "logps_train/ref_1_2": -92.0, "logps_train/ref_1_l": -42.0, "logps_train/ref_1_w": -75.0, "logps_train/ref_2_2": -77.0, "logps_train/ref_2_w": -87.5, "rewards_train/1-2": 0.6447609663009644, "rewards_train/1-l": -0.14894665777683258, "rewards_train/1-w": 0.7504956722259521, "rewards_train/2-2": 0.8128091096878052, "rewards_train/2-w": 0.36871397495269775, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.8994423300027847, "rewards_train/margins_1": 0.10573470592498779, "rewards_train/margins_2": 0.4440951347351074, "step": 48 }, { "epoch": 0.14, "logps_train/policy_1_2": -237.85800170898438, "logps_train/policy_1_l": -165.668701171875, "logps_train/policy_1_w": -162.01260375976562, "logps_train/policy_2_2": -203.02679443359375, "logps_train/policy_2_w": -197.32925415039062, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -217.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 0.8517007231712341, "rewards_train/1-l": -1.116870403289795, "rewards_train/1-w": 1.8549891710281372, "rewards_train/2-2": 1.39732027053833, "rewards_train/2-w": 1.242073655128479, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.971859574317932, "rewards_train/margins_1": 1.003288447856903, "rewards_train/margins_2": 0.15524661540985107, "step": 48 }, { "epoch": 0.14, "logps_train/policy_1_2": -229.2822265625, "logps_train/policy_1_l": -172.04052734375, "logps_train/policy_1_w": -146.76187133789062, "logps_train/policy_2_2": -195.3127899169922, "logps_train/policy_2_w": -178.37548828125, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": 1.584277868270874, "rewards_train/1-l": -1.024366021156311, "rewards_train/1-w": 1.9706881046295166, "rewards_train/2-2": 2.0937204360961914, "rewards_train/2-w": 1.6437016725540161, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.9950541257858276, "rewards_train/margins_1": 0.3864102363586426, "rewards_train/margins_2": 0.4500187635421753, "step": 48 }, { "epoch": 0.14, "logps_train/policy_1_2": -220.89859008789062, "logps_train/policy_1_l": -248.74575805664062, "logps_train/policy_1_w": -177.32662963867188, "logps_train/policy_2_2": -185.62991333007812, "logps_train/policy_2_w": -233.2589874267578, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 0.993735671043396, "rewards_train/1-l": -2.214808940887451, "rewards_train/1-w": 2.2517120838165283, "rewards_train/2-2": 1.6799774169921875, "rewards_train/2-w": 1.3116002082824707, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.4665210247039795, "rewards_train/margins_1": 1.2579764127731323, "rewards_train/margins_2": 0.3683772087097168, "step": 48 }, { "epoch": 0.14, "logps_train/policy_1_2": -140.40682983398438, "logps_train/policy_1_l": -137.5308074951172, "logps_train/policy_1_w": -89.44683074951172, "logps_train/policy_2_2": -109.74561309814453, "logps_train/policy_2_w": -110.41384887695312, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 1.2671303749084473, "rewards_train/1-l": -1.2077687978744507, "rewards_train/1-w": 0.7704048156738281, "rewards_train/2-2": 1.9402825832366943, "rewards_train/2-w": 0.5472878217697144, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.9781736135482788, "rewards_train/margins_1": -0.49672555923461914, "rewards_train/margins_2": 1.39299476146698, "step": 48 }, { "epoch": 0.14, "logps_train/policy_1_2": -144.01568603515625, "logps_train/policy_1_l": -159.1154022216797, "logps_train/policy_1_w": -79.56246948242188, "logps_train/policy_2_2": -133.86236572265625, "logps_train/policy_2_w": -95.45193481445312, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 1.3640556335449219, "rewards_train/1-l": -0.8117364645004272, "rewards_train/1-w": 0.646194577217102, "rewards_train/2-2": 1.59657621383667, "rewards_train/2-w": 0.25666213035583496, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.4579310417175293, "rewards_train/margins_1": -0.7178610563278198, "rewards_train/margins_2": 1.339914083480835, "step": 48 }, { "epoch": 0.14, "logps_train/policy_1_2": -196.21533203125, "logps_train/policy_1_l": -201.40771484375, "logps_train/policy_1_w": -155.775390625, "logps_train/policy_2_2": -157.43206787109375, "logps_train/policy_2_w": -198.63003540039062, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 0.9003416299819946, "rewards_train/1-l": -1.0117918252944946, "rewards_train/1-w": 1.7119147777557373, "rewards_train/2-2": 1.219293475151062, "rewards_train/2-w": 0.9948081374168396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.723706603050232, "rewards_train/margins_1": 0.8115731477737427, "rewards_train/margins_2": 0.2244853377342224, "step": 48 }, { "epoch": 0.14, "logps_train/policy_1_2": -141.06781005859375, "logps_train/policy_1_l": -182.53915405273438, "logps_train/policy_1_w": -147.5817108154297, "logps_train/policy_2_2": -117.86155700683594, "logps_train/policy_2_w": -183.4361114501953, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.5369696617126465, "rewards_train/1-l": -1.6001077890396118, "rewards_train/1-w": 1.8171215057373047, "rewards_train/2-2": 1.7888448238372803, "rewards_train/2-w": 1.0446710586547852, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4172292947769165, "rewards_train/margins_1": 0.2801518440246582, "rewards_train/margins_2": 0.7441737651824951, "step": 48 }, { "epoch": 0.15, "logps_train/policy_1_2": -198.9268798828125, "logps_train/policy_1_l": -166.20501708984375, "logps_train/policy_1_w": -171.172607421875, "logps_train/policy_2_2": -162.9403076171875, "logps_train/policy_2_w": -207.1513214111328, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.1823128461837769, "rewards_train/1-l": -1.8443286418914795, "rewards_train/1-w": 1.988988995552063, "rewards_train/2-2": 1.8747204542160034, "rewards_train/2-w": 1.3567428588867188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8333176374435425, "rewards_train/margins_1": 0.8066761493682861, "rewards_train/margins_2": 0.5179775953292847, "step": 49 }, { "epoch": 0.15, "logps_train/policy_1_2": -205.15354919433594, "logps_train/policy_1_l": -154.6766815185547, "logps_train/policy_1_w": -195.77911376953125, "logps_train/policy_2_2": -175.6552734375, "logps_train/policy_2_w": -233.25161743164062, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 1.4721453189849854, "rewards_train/1-l": -1.5134700536727905, "rewards_train/1-w": 1.4556822776794434, "rewards_train/2-2": 2.0532212257385254, "rewards_train/2-w": 0.7146827578544617, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.969152331352234, "rewards_train/margins_1": -0.016463041305541992, "rewards_train/margins_2": 1.3385384678840637, "step": 49 }, { "epoch": 0.15, "logps_train/policy_1_2": -124.44364929199219, "logps_train/policy_1_l": -64.40637969970703, "logps_train/policy_1_w": -51.72660827636719, "logps_train/policy_2_2": -101.75686645507812, "logps_train/policy_2_w": -67.4456787109375, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -62.0, "logps_train/ref_1_w": -63.0, "logps_train/ref_2_2": -114.5, "logps_train/ref_2_w": -73.5, "rewards_train/1-2": 0.806465744972229, "rewards_train/1-l": -0.254993200302124, "rewards_train/1-w": 1.1141068935394287, "rewards_train/2-2": 1.2525354623794556, "rewards_train/2-w": 0.6094363331794739, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.3691000938415527, "rewards_train/margins_1": 0.3076411485671997, "rewards_train/margins_2": 0.6430991291999817, "step": 49 }, { "epoch": 0.15, "logps_train/policy_1_2": -219.00352478027344, "logps_train/policy_1_l": -274.525390625, "logps_train/policy_1_w": -168.555908203125, "logps_train/policy_2_2": -185.58084106445312, "logps_train/policy_2_w": -206.87503051757812, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -247.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.6457411050796509, "rewards_train/1-l": -2.7619147300720215, "rewards_train/1-w": 2.7694098949432373, "rewards_train/2-2": 1.8667190074920654, "rewards_train/2-w": 2.2249956130981445, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.531324625015259, "rewards_train/margins_1": 1.1236687898635864, "rewards_train/margins_2": -0.3582766056060791, "step": 49 }, { "epoch": 0.15, "logps_train/policy_1_2": -132.74205017089844, "logps_train/policy_1_l": -128.43228149414062, "logps_train/policy_1_w": -100.21229553222656, "logps_train/policy_2_2": -117.6690444946289, "logps_train/policy_2_w": -113.83213806152344, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -115.5, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -125.5, "rewards_train/1-2": 1.1140758991241455, "rewards_train/1-l": -0.9332664012908936, "rewards_train/1-w": 1.5100207328796387, "rewards_train/2-2": 1.3416895866394043, "rewards_train/2-w": 1.1542857885360718, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.4432871341705322, "rewards_train/margins_1": 0.39594483375549316, "rewards_train/margins_2": 0.18740379810333252, "step": 49 }, { "epoch": 0.15, "logps_train/policy_1_2": -158.1065216064453, "logps_train/policy_1_l": -175.5547637939453, "logps_train/policy_1_w": -105.01597595214844, "logps_train/policy_2_2": -130.37347412109375, "logps_train/policy_2_w": -133.91012573242188, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.654191255569458, "rewards_train/1-l": -1.0919997692108154, "rewards_train/1-w": 1.7054333686828613, "rewards_train/2-2": 2.0392160415649414, "rewards_train/2-w": 1.1902368068695068, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7974331378936768, "rewards_train/margins_1": 0.05124211311340332, "rewards_train/margins_2": 0.8489792346954346, "step": 49 }, { "epoch": 0.15, "logps_train/policy_1_2": -193.9059600830078, "logps_train/policy_1_l": -181.0946807861328, "logps_train/policy_1_w": -134.2769775390625, "logps_train/policy_2_2": -170.9629669189453, "logps_train/policy_2_w": -150.7751007080078, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.5039355754852295, "rewards_train/1-l": -1.8529245853424072, "rewards_train/1-w": 2.055894374847412, "rewards_train/2-2": 1.9716718196868896, "rewards_train/2-w": 1.677177906036377, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.9088189601898193, "rewards_train/margins_1": 0.5519587993621826, "rewards_train/margins_2": 0.2944939136505127, "step": 49 }, { "epoch": 0.15, "logps_train/policy_1_2": -137.318603515625, "logps_train/policy_1_l": -81.47794342041016, "logps_train/policy_1_w": -114.41792297363281, "logps_train/policy_2_2": -119.66828918457031, "logps_train/policy_2_w": -134.3423309326172, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -80.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.0947012901306152, "rewards_train/1-l": -0.11185680329799652, "rewards_train/1-w": 1.262114405632019, "rewards_train/2-2": 1.2491868734359741, "rewards_train/2-w": 0.6868606209754944, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.3739712089300156, "rewards_train/margins_1": 0.1674131155014038, "rewards_train/margins_2": 0.5623262524604797, "step": 49 }, { "epoch": 0.15, "learning_rate": 4.9921961409251465e-06, "loss": 0.9116, "step": 50 }, { "epoch": 0.15, "logps_train/policy_1_2": -127.31770324707031, "logps_train/policy_1_l": -140.82949829101562, "logps_train/policy_1_w": -129.1451416015625, "logps_train/policy_2_2": -109.53372192382812, "logps_train/policy_2_w": -157.62423706054688, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": 1.3807295560836792, "rewards_train/1-l": -1.1626373529434204, "rewards_train/1-w": 1.2972040176391602, "rewards_train/2-2": 1.7388155460357666, "rewards_train/2-w": 0.9453881978988647, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.4598413705825806, "rewards_train/margins_1": -0.08352553844451904, "rewards_train/margins_2": 0.7934273481369019, "step": 50 }, { "epoch": 0.15, "logps_train/policy_1_2": -121.92219543457031, "logps_train/policy_1_l": -137.08973693847656, "logps_train/policy_1_w": -76.50269317626953, "logps_train/policy_2_2": -107.4617691040039, "logps_train/policy_2_w": -89.74473571777344, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -88.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -99.5, "rewards_train/1-2": 1.2374680042266846, "rewards_train/1-l": -1.1472548246383667, "rewards_train/1-w": 1.1735587120056152, "rewards_train/2-2": 1.4585108757019043, "rewards_train/2-w": 0.9766985177993774, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.320813536643982, "rewards_train/margins_1": -0.06390929222106934, "rewards_train/margins_2": 0.48181235790252686, "step": 50 }, { "epoch": 0.15, "logps_train/policy_1_2": -222.43148803710938, "logps_train/policy_1_l": -160.60394287109375, "logps_train/policy_1_w": -187.09487915039062, "logps_train/policy_2_2": -198.9100341796875, "logps_train/policy_2_w": -203.94351196289062, "logps_train/ref_1_2": -241.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 1.8193514347076416, "rewards_train/1-l": -1.0517992973327637, "rewards_train/1-w": 1.6850439310073853, "rewards_train/2-2": 2.3511831760406494, "rewards_train/2-w": 1.296273112297058, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.736843228340149, "rewards_train/margins_1": -0.13430750370025635, "rewards_train/margins_2": 1.0549100637435913, "step": 50 }, { "epoch": 0.15, "logps_train/policy_1_2": -133.44192504882812, "logps_train/policy_1_l": -230.1332244873047, "logps_train/policy_1_w": -146.68472290039062, "logps_train/policy_2_2": -106.22052001953125, "logps_train/policy_2_w": -190.36868286132812, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 0.9517068266868591, "rewards_train/1-l": -1.7928149700164795, "rewards_train/1-w": 1.9518401622772217, "rewards_train/2-2": 1.3162294626235962, "rewards_train/2-w": 1.55922532081604, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.744655132293701, "rewards_train/margins_1": 1.0001333355903625, "rewards_train/margins_2": -0.24299585819244385, "step": 50 }, { "epoch": 0.15, "logps_train/policy_1_2": -276.62860107421875, "logps_train/policy_1_l": -291.8389892578125, "logps_train/policy_1_w": -221.7581329345703, "logps_train/policy_2_2": -242.43814086914062, "logps_train/policy_2_w": -258.1946105957031, "logps_train/ref_1_2": -296.0, "logps_train/ref_1_l": -270.0, "logps_train/ref_1_w": -236.0, "logps_train/ref_2_2": -264.0, "logps_train/ref_2_w": -272.0, "rewards_train/1-2": 1.8246396780014038, "rewards_train/1-l": -2.271397113800049, "rewards_train/1-w": 1.4741877317428589, "rewards_train/2-2": 2.281186819076538, "rewards_train/2-w": 1.3680391311645508, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.7455848455429077, "rewards_train/margins_1": -0.3504519462585449, "rewards_train/margins_2": 0.9131476879119873, "step": 50 }, { "epoch": 0.15, "logps_train/policy_1_2": -227.11004638671875, "logps_train/policy_1_l": -162.94235229492188, "logps_train/policy_1_w": -161.81863403320312, "logps_train/policy_2_2": -186.78814697265625, "logps_train/policy_2_w": -192.5791778564453, "logps_train/ref_1_2": -239.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.2171201705932617, "rewards_train/1-l": -1.0678671598434448, "rewards_train/1-w": 1.6118872165679932, "rewards_train/2-2": 2.249309539794922, "rewards_train/2-w": 1.0842702388763428, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.679754376411438, "rewards_train/margins_1": 0.39476704597473145, "rewards_train/margins_2": 1.165039300918579, "step": 50 }, { "epoch": 0.15, "logps_train/policy_1_2": -211.41201782226562, "logps_train/policy_1_l": -231.18104553222656, "logps_train/policy_1_w": -200.71200561523438, "logps_train/policy_2_2": -187.0320281982422, "logps_train/policy_2_w": -243.30374145507812, "logps_train/ref_1_2": -231.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -222.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -258.0, "rewards_train/1-2": 1.9650492668151855, "rewards_train/1-l": -2.317322015762329, "rewards_train/1-w": 2.1194257736206055, "rewards_train/2-2": 2.5092978477478027, "rewards_train/2-w": 1.4758747816085815, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.436747789382935, "rewards_train/margins_1": 0.15437650680541992, "rewards_train/margins_2": 1.0334230661392212, "step": 50 }, { "epoch": 0.15, "logps_train/policy_1_2": -160.19635009765625, "logps_train/policy_1_l": -121.66166687011719, "logps_train/policy_1_w": -174.30230712890625, "logps_train/policy_2_2": -131.14389038085938, "logps_train/policy_2_w": -204.4107666015625, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 0.892084538936615, "rewards_train/1-l": -0.7552293539047241, "rewards_train/1-w": 2.463520050048828, "rewards_train/2-2": 1.5176424980163574, "rewards_train/2-w": 1.7464241981506348, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.2187494039535522, "rewards_train/margins_1": 1.5714355111122131, "rewards_train/margins_2": -0.22878170013427734, "step": 50 }, { "epoch": 0.15, "logps_train/policy_1_2": -111.5613021850586, "logps_train/policy_1_l": -194.41070556640625, "logps_train/policy_1_w": -155.36099243164062, "logps_train/policy_2_2": -101.03168487548828, "logps_train/policy_2_w": -170.22171020507812, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 0.6969948410987854, "rewards_train/1-l": -1.1691968441009521, "rewards_train/1-w": 1.2158523797988892, "rewards_train/2-2": 0.7382377982139587, "rewards_train/2-w": 0.950486421585083, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.3850492238998413, "rewards_train/margins_1": 0.5188575387001038, "rewards_train/margins_2": -0.21224862337112427, "step": 51 }, { "epoch": 0.15, "logps_train/policy_1_2": -139.39588928222656, "logps_train/policy_1_l": -151.557373046875, "logps_train/policy_1_w": -67.36135864257812, "logps_train/policy_2_2": -109.06993103027344, "logps_train/policy_2_w": -87.43740844726562, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -77.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -93.5, "rewards_train/1-2": 0.5385361909866333, "rewards_train/1-l": -1.6268320083618164, "rewards_train/1-w": 0.9490203857421875, "rewards_train/2-2": 1.4109758138656616, "rewards_train/2-w": 0.6029388308525085, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.575852394104004, "rewards_train/margins_1": 0.4104841947555542, "rewards_train/margins_2": 0.8080369830131531, "step": 51 }, { "epoch": 0.15, "logps_train/policy_1_2": -201.8369140625, "logps_train/policy_1_l": -198.93942260742188, "logps_train/policy_1_w": -172.5494842529297, "logps_train/policy_2_2": -175.92510986328125, "logps_train/policy_2_w": -202.03268432617188, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 1.4131836891174316, "rewards_train/1-l": -0.998629093170166, "rewards_train/1-w": 1.8903632164001465, "rewards_train/2-2": 1.919206976890564, "rewards_train/2-w": 1.4436061382293701, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8889923095703125, "rewards_train/margins_1": 0.47717952728271484, "rewards_train/margins_2": 0.47560083866119385, "step": 51 }, { "epoch": 0.15, "logps_train/policy_1_2": -140.6614227294922, "logps_train/policy_1_l": -123.13885498046875, "logps_train/policy_1_w": -123.75816345214844, "logps_train/policy_2_2": -113.97052764892578, "logps_train/policy_2_w": -139.21487426757812, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.2737021446228027, "rewards_train/1-l": -0.4552916884422302, "rewards_train/1-w": 1.438246726989746, "rewards_train/2-2": 1.761541724205017, "rewards_train/2-w": 1.0660122632980347, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.8935384154319763, "rewards_train/margins_1": 0.16454458236694336, "rewards_train/margins_2": 0.6955294609069824, "step": 51 }, { "epoch": 0.15, "logps_train/policy_1_2": -90.15120697021484, "logps_train/policy_1_l": -98.55746459960938, "logps_train/policy_1_w": -97.74897003173828, "logps_train/policy_2_2": -76.18567657470703, "logps_train/policy_2_w": -121.12744140625, "logps_train/ref_1_2": -94.0, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -82.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 0.38058263063430786, "rewards_train/1-l": -1.14363694190979, "rewards_train/1-w": 1.1157286167144775, "rewards_train/2-2": 0.5888542532920837, "rewards_train/2-w": 0.7935065031051636, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.2593655586242676, "rewards_train/margins_1": 0.7351459860801697, "rewards_train/margins_2": -0.20465224981307983, "step": 51 }, { "epoch": 0.15, "logps_train/policy_1_2": -143.45382690429688, "logps_train/policy_1_l": -127.79624938964844, "logps_train/policy_1_w": -113.21221160888672, "logps_train/policy_2_2": -122.17880249023438, "logps_train/policy_2_w": -134.00180053710938, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -119.5, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.9225858449935913, "rewards_train/1-l": -0.8124371767044067, "rewards_train/1-w": 2.161591053009033, "rewards_train/2-2": 1.374698519706726, "rewards_train/2-w": 1.7701330184936523, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.97402822971344, "rewards_train/margins_1": 1.239005208015442, "rewards_train/margins_2": -0.39543449878692627, "step": 51 }, { "epoch": 0.15, "logps_train/policy_1_2": -188.75904846191406, "logps_train/policy_1_l": -212.3402557373047, "logps_train/policy_1_w": -161.12733459472656, "logps_train/policy_2_2": -162.39306640625, "logps_train/policy_2_w": -204.58811950683594, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.6233139038085938, "rewards_train/1-l": -1.8560972213745117, "rewards_train/1-w": 1.2747666835784912, "rewards_train/2-2": 2.0520987510681152, "rewards_train/2-w": 0.5849381685256958, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.130863904953003, "rewards_train/margins_1": -0.34854722023010254, "rewards_train/margins_2": 1.4671605825424194, "step": 51 }, { "epoch": 0.15, "logps_train/policy_1_2": -151.80270385742188, "logps_train/policy_1_l": -157.03939819335938, "logps_train/policy_1_w": -117.93225860595703, "logps_train/policy_2_2": -133.00033569335938, "logps_train/policy_2_w": -142.61703491210938, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.3197298049926758, "rewards_train/1-l": -1.48518967628479, "rewards_train/1-w": 1.8130241632461548, "rewards_train/2-2": 1.662466049194336, "rewards_train/2-w": 1.246109962463379, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.298213839530945, "rewards_train/margins_1": 0.493294358253479, "rewards_train/margins_2": 0.41635608673095703, "step": 51 }, { "epoch": 0.16, "learning_rate": 4.9901246065380425e-06, "loss": 0.8546, "step": 52 }, { "epoch": 0.16, "logps_train/policy_1_2": -89.24642944335938, "logps_train/policy_1_l": -142.06130981445312, "logps_train/policy_1_w": -100.78447723388672, "logps_train/policy_2_2": -73.69862365722656, "logps_train/policy_2_w": -118.95259094238281, "logps_train/ref_1_2": -94.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -80.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 0.4691073000431061, "rewards_train/1-l": -0.7856231927871704, "rewards_train/1-w": 1.0477241277694702, "rewards_train/2-2": 0.6324815154075623, "rewards_train/2-w": 0.7449754476547241, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 1.8333473205566406, "rewards_train/margins_1": 0.5786168277263641, "rewards_train/margins_2": -0.11249393224716187, "step": 52 }, { "epoch": 0.16, "logps_train/policy_1_2": -194.3208770751953, "logps_train/policy_1_l": -216.00914001464844, "logps_train/policy_1_w": -146.13833618164062, "logps_train/policy_2_2": -155.32025146484375, "logps_train/policy_2_w": -184.4119873046875, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -199.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.6077568531036377, "rewards_train/1-l": -1.6806018352508545, "rewards_train/1-w": 1.8939785957336426, "rewards_train/2-2": 2.23594331741333, "rewards_train/2-w": 1.354114294052124, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.574580430984497, "rewards_train/margins_1": 0.2862217426300049, "rewards_train/margins_2": 0.881829023361206, "step": 52 }, { "epoch": 0.16, "logps_train/policy_1_2": -126.3289794921875, "logps_train/policy_1_l": -158.52078247070312, "logps_train/policy_1_w": -178.74417114257812, "logps_train/policy_2_2": -109.15461730957031, "logps_train/policy_2_w": -202.52635192871094, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.010558843612671, "rewards_train/1-l": -1.0170190334320068, "rewards_train/1-w": 1.4882782697677612, "rewards_train/2-2": 1.3387378454208374, "rewards_train/2-w": 1.0528329610824585, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.505297303199768, "rewards_train/margins_1": 0.47771942615509033, "rewards_train/margins_2": 0.2859048843383789, "step": 52 }, { "epoch": 0.16, "logps_train/policy_1_2": -106.95712280273438, "logps_train/policy_1_l": -103.90691375732422, "logps_train/policy_1_w": -86.66385650634766, "logps_train/policy_2_2": -96.0038070678711, "logps_train/policy_2_w": -100.46084594726562, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -105.5, "logps_train/ref_2_w": -111.5, "rewards_train/1-2": 0.8546790480613708, "rewards_train/1-l": -0.7672539949417114, "rewards_train/1-w": 1.3873744010925293, "rewards_train/2-2": 0.9668067693710327, "rewards_train/2-w": 1.1011316776275635, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.1546283960342407, "rewards_train/margins_1": 0.5326953530311584, "rewards_train/margins_2": -0.13432490825653076, "step": 52 }, { "epoch": 0.16, "logps_train/policy_1_2": -173.2310028076172, "logps_train/policy_1_l": -188.34083557128906, "logps_train/policy_1_w": -129.84591674804688, "logps_train/policy_2_2": -152.601806640625, "logps_train/policy_2_w": -150.46824645996094, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.826899528503418, "rewards_train/1-l": -1.686427116394043, "rewards_train/1-w": 1.637283444404602, "rewards_train/2-2": 2.3351333141326904, "rewards_train/2-w": 1.3906750679016113, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.323710560798645, "rewards_train/margins_1": -0.18961608409881592, "rewards_train/margins_2": 0.9444582462310791, "step": 52 }, { "epoch": 0.16, "logps_train/policy_1_2": -137.70387268066406, "logps_train/policy_1_l": -204.48936462402344, "logps_train/policy_1_w": -106.44606018066406, "logps_train/policy_2_2": -110.40198516845703, "logps_train/policy_2_w": -128.08099365234375, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": 1.4264873266220093, "rewards_train/1-l": -1.5098745822906494, "rewards_train/1-w": 1.1944568157196045, "rewards_train/2-2": 1.7754267454147339, "rewards_train/2-w": 0.8747127056121826, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.704331398010254, "rewards_train/margins_1": -0.23203051090240479, "rewards_train/margins_2": 0.9007140398025513, "step": 52 }, { "epoch": 0.16, "logps_train/policy_1_2": -201.92434692382812, "logps_train/policy_1_l": -200.76113891601562, "logps_train/policy_1_w": -152.05593872070312, "logps_train/policy_2_2": -175.85951232910156, "logps_train/policy_2_w": -181.83187866210938, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.4872522354125977, "rewards_train/1-l": -1.7839252948760986, "rewards_train/1-w": 1.7975306510925293, "rewards_train/2-2": 2.0890495777130127, "rewards_train/2-w": 1.16056227684021, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.581455945968628, "rewards_train/margins_1": 0.31027841567993164, "rewards_train/margins_2": 0.9284873008728027, "step": 52 }, { "epoch": 0.16, "logps_train/policy_1_2": -76.66217041015625, "logps_train/policy_1_l": -53.479225158691406, "logps_train/policy_1_w": -72.22918701171875, "logps_train/policy_2_2": -62.649559020996094, "logps_train/policy_2_w": -91.19392395019531, "logps_train/ref_1_2": -85.0, "logps_train/ref_1_l": -50.5, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -72.0, "logps_train/ref_2_w": -96.5, "rewards_train/1-2": 0.8244081735610962, "rewards_train/1-l": -0.2891331613063812, "rewards_train/1-w": 0.7580386400222778, "rewards_train/2-2": 0.9006690382957458, "rewards_train/2-w": 0.5370528101921082, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.047171801328659, "rewards_train/margins_1": -0.06636953353881836, "rewards_train/margins_2": 0.3636162281036377, "step": 52 }, { "epoch": 0.16, "logps_train/policy_1_2": -201.74769592285156, "logps_train/policy_1_l": -244.89865112304688, "logps_train/policy_1_w": -126.27684020996094, "logps_train/policy_2_2": -168.81971740722656, "logps_train/policy_2_w": -153.27711486816406, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -229.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 2.122105360031128, "rewards_train/1-l": -1.5371317863464355, "rewards_train/1-w": 1.3512225151062012, "rewards_train/2-2": 2.5445919036865234, "rewards_train/2-w": 0.8172101378440857, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8883543014526367, "rewards_train/margins_1": -0.7708828449249268, "rewards_train/margins_2": 1.7273817658424377, "step": 53 }, { "epoch": 0.16, "logps_train/policy_1_2": -188.24423217773438, "logps_train/policy_1_l": -266.6053161621094, "logps_train/policy_1_w": -114.69136047363281, "logps_train/policy_2_2": -152.6358184814453, "logps_train/policy_2_w": -140.7955322265625, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -232.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.3255760669708252, "rewards_train/1-l": -3.4812355041503906, "rewards_train/1-w": 1.5558640956878662, "rewards_train/2-2": 1.8926928043365479, "rewards_train/2-w": 1.337634801864624, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.037099599838257, "rewards_train/margins_1": 0.23028802871704102, "rewards_train/margins_2": 0.5550580024719238, "step": 53 }, { "epoch": 0.16, "logps_train/policy_1_2": -193.14427185058594, "logps_train/policy_1_l": -202.32821655273438, "logps_train/policy_1_w": -205.6927490234375, "logps_train/policy_2_2": -170.39352416992188, "logps_train/policy_2_w": -231.71875, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -229.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 0.6937758922576904, "rewards_train/1-l": -1.6086032390594482, "rewards_train/1-w": 2.3596320152282715, "rewards_train/2-2": 1.0827181339263916, "rewards_train/2-w": 1.628906011581421, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 3.9682352542877197, "rewards_train/margins_1": 1.665856122970581, "rewards_train/margins_2": -0.5461878776550293, "step": 53 }, { "epoch": 0.16, "logps_train/policy_1_2": -61.398109436035156, "logps_train/policy_1_l": -75.29924774169922, "logps_train/policy_1_w": -93.5233154296875, "logps_train/policy_2_2": -49.645751953125, "logps_train/policy_2_w": -118.24198913574219, "logps_train/ref_1_2": -64.0, "logps_train/ref_1_l": -71.0, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -56.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 0.27854830026626587, "rewards_train/1-l": -0.4455500841140747, "rewards_train/1-w": 1.21661376953125, "rewards_train/2-2": 0.6603273153305054, "rewards_train/2-w": 0.8789260387420654, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.6621638536453247, "rewards_train/margins_1": 0.9380654692649841, "rewards_train/margins_2": -0.21859872341156006, "step": 53 }, { "epoch": 0.16, "logps_train/policy_1_2": -169.45596313476562, "logps_train/policy_1_l": -121.96940612792969, "logps_train/policy_1_w": -125.30533599853516, "logps_train/policy_2_2": -138.08627319335938, "logps_train/policy_2_w": -158.02639770507812, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 0.687215268611908, "rewards_train/1-l": -0.703190803527832, "rewards_train/1-w": 1.6241542100906372, "rewards_train/2-2": 1.441373348236084, "rewards_train/2-w": 0.7536110877990723, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.3273450136184692, "rewards_train/margins_1": 0.9369389414787292, "rewards_train/margins_2": 0.6877622604370117, "step": 53 }, { "epoch": 0.16, "logps_train/policy_1_2": -141.99049377441406, "logps_train/policy_1_l": -128.43792724609375, "logps_train/policy_1_w": -71.47807312011719, "logps_train/policy_2_2": -114.986572265625, "logps_train/policy_2_w": -98.25446319580078, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -102.0, "rewards_train/1-2": 1.2892329692840576, "rewards_train/1-l": -0.4926207363605499, "rewards_train/1-w": 0.8896929621696472, "rewards_train/2-2": 2.001343011856079, "rewards_train/2-w": 0.38705354928970337, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.3823136985301971, "rewards_train/margins_1": -0.3995400071144104, "rewards_train/margins_2": 1.6142894625663757, "step": 53 }, { "epoch": 0.16, "logps_train/policy_1_2": -131.51376342773438, "logps_train/policy_1_l": -150.67791748046875, "logps_train/policy_1_w": -135.41297912597656, "logps_train/policy_2_2": -96.74574279785156, "logps_train/policy_2_w": -160.2897186279297, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.094717025756836, "rewards_train/1-l": -1.427165150642395, "rewards_train/1-w": 1.5024518966674805, "rewards_train/2-2": 1.5973000526428223, "rewards_train/2-w": 0.9866534471511841, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.9296170473098755, "rewards_train/margins_1": 0.40773487091064453, "rewards_train/margins_2": 0.6106466054916382, "step": 53 }, { "epoch": 0.16, "logps_train/policy_1_2": -89.830322265625, "logps_train/policy_1_l": -110.06438446044922, "logps_train/policy_1_w": -115.03822326660156, "logps_train/policy_2_2": -72.9806900024414, "logps_train/policy_2_w": -147.57089233398438, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -84.5, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.009155511856079, "rewards_train/1-l": -0.8997974991798401, "rewards_train/1-w": 2.207115411758423, "rewards_train/2-2": 1.1613060235977173, "rewards_train/2-w": 1.615567922592163, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.106912910938263, "rewards_train/margins_1": 1.1979598999023438, "rewards_train/margins_2": -0.4542618989944458, "step": 53 }, { "epoch": 0.16, "learning_rate": 4.987810040379161e-06, "loss": 1.0117, "step": 54 }, { "epoch": 0.16, "logps_train/policy_1_2": -127.8287353515625, "logps_train/policy_1_l": -154.0006866455078, "logps_train/policy_1_w": -129.06597900390625, "logps_train/policy_2_2": -104.85474395751953, "logps_train/policy_2_w": -161.57070922851562, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 0.8983771800994873, "rewards_train/1-l": -0.8889362812042236, "rewards_train/1-w": 1.5285592079162598, "rewards_train/2-2": 1.3223381042480469, "rewards_train/2-w": 1.4464443922042847, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.4174954891204834, "rewards_train/margins_1": 0.6301820278167725, "rewards_train/margins_2": -0.12410628795623779, "step": 54 }, { "epoch": 0.16, "logps_train/policy_1_2": -143.9462127685547, "logps_train/policy_1_l": -118.16984558105469, "logps_train/policy_1_w": -137.4585418701172, "logps_train/policy_2_2": -114.82352447509766, "logps_train/policy_2_w": -165.38955688476562, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 0.9600659608840942, "rewards_train/1-l": -0.5751871466636658, "rewards_train/1-w": 1.3846144676208496, "rewards_train/2-2": 1.4223345518112183, "rewards_train/2-w": 0.6610438227653503, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.9598016142845154, "rewards_train/margins_1": 0.42454850673675537, "rewards_train/margins_2": 0.7612907290458679, "step": 54 }, { "epoch": 0.16, "logps_train/policy_1_2": -136.38742065429688, "logps_train/policy_1_l": -80.64900207519531, "logps_train/policy_1_w": -97.66452026367188, "logps_train/policy_2_2": -110.3410873413086, "logps_train/policy_2_w": -126.69529724121094, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -73.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 0.7296169996261597, "rewards_train/1-l": -0.7539632320404053, "rewards_train/1-w": 1.4101104736328125, "rewards_train/2-2": 1.6752660274505615, "rewards_train/2-w": 0.5898449420928955, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.1640737056732178, "rewards_train/margins_1": 0.6804934740066528, "rewards_train/margins_2": 1.085421085357666, "step": 54 }, { "epoch": 0.16, "logps_train/policy_1_2": -162.11456298828125, "logps_train/policy_1_l": -195.40533447265625, "logps_train/policy_1_w": -154.0493927001953, "logps_train/policy_2_2": -129.56985473632812, "logps_train/policy_2_w": -196.9410400390625, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 1.846355676651001, "rewards_train/1-l": -1.4360407590866089, "rewards_train/1-w": 2.6489667892456055, "rewards_train/2-2": 2.4227027893066406, "rewards_train/2-w": 1.912147045135498, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.085007548332214, "rewards_train/margins_1": 0.8026111125946045, "rewards_train/margins_2": 0.5105557441711426, "step": 54 }, { "epoch": 0.16, "logps_train/policy_1_2": -124.56498718261719, "logps_train/policy_1_l": -120.96073913574219, "logps_train/policy_1_w": -119.24774932861328, "logps_train/policy_2_2": -103.77108764648438, "logps_train/policy_2_w": -142.21688842773438, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -110.5, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.1519970893859863, "rewards_train/1-l": -1.0733927488327026, "rewards_train/1-w": 1.5373343229293823, "rewards_train/2-2": 1.4391512870788574, "rewards_train/2-w": 0.8197164535522461, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.610727071762085, "rewards_train/margins_1": 0.385337233543396, "rewards_train/margins_2": 0.6194348335266113, "step": 54 }, { "epoch": 0.16, "logps_train/policy_1_2": -257.5874938964844, "logps_train/policy_1_l": -186.9366455078125, "logps_train/policy_1_w": -138.26022338867188, "logps_train/policy_2_2": -210.28305053710938, "logps_train/policy_2_w": -174.2281036376953, "logps_train/ref_1_2": -272.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -235.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.38187575340271, "rewards_train/1-l": -1.6655397415161133, "rewards_train/1-w": 2.0021018981933594, "rewards_train/2-2": 2.477944850921631, "rewards_train/2-w": 1.5646898746490479, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6676416397094727, "rewards_train/margins_1": 0.6202261447906494, "rewards_train/margins_2": 0.913254976272583, "step": 54 }, { "epoch": 0.16, "logps_train/policy_1_2": -130.89723205566406, "logps_train/policy_1_l": -119.14883422851562, "logps_train/policy_1_w": -60.593482971191406, "logps_train/policy_2_2": -113.15644073486328, "logps_train/policy_2_w": -72.87648010253906, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -66.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -76.5, "rewards_train/1-2": 1.1102774143218994, "rewards_train/1-l": -1.1016024351119995, "rewards_train/1-w": 0.5312767624855042, "rewards_train/2-2": 1.331230640411377, "rewards_train/2-w": 0.36489105224609375, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 1.6328791975975037, "rewards_train/margins_1": -0.5790006518363953, "rewards_train/margins_2": 0.9663395881652832, "step": 54 }, { "epoch": 0.16, "logps_train/policy_1_2": -81.30030822753906, "logps_train/policy_1_l": -103.93208312988281, "logps_train/policy_1_w": -114.38875579833984, "logps_train/policy_2_2": -75.4468994140625, "logps_train/policy_2_w": -133.052978515625, "logps_train/ref_1_2": -90.5, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -83.5, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.8969229459762573, "rewards_train/1-l": -0.5853961706161499, "rewards_train/1-w": 1.7369056940078735, "rewards_train/2-2": 0.8275758028030396, "rewards_train/2-w": 1.3423573970794678, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.3223018646240234, "rewards_train/margins_1": 0.8399827480316162, "rewards_train/margins_2": -0.5147815942764282, "step": 54 }, { "epoch": 0.16, "logps_train/policy_1_2": -171.422607421875, "logps_train/policy_1_l": -219.46356201171875, "logps_train/policy_1_w": -196.2461700439453, "logps_train/policy_2_2": -145.45718383789062, "logps_train/policy_2_w": -224.83668518066406, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 1.323364496231079, "rewards_train/1-l": -2.3089065551757812, "rewards_train/1-w": 2.3206958770751953, "rewards_train/2-2": 1.8870936632156372, "rewards_train/2-w": 1.5452375411987305, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.629602432250977, "rewards_train/margins_1": 0.9973313808441162, "rewards_train/margins_2": 0.34185612201690674, "step": 55 }, { "epoch": 0.16, "logps_train/policy_1_2": -206.43101501464844, "logps_train/policy_1_l": -227.6201171875, "logps_train/policy_1_w": -173.8731231689453, "logps_train/policy_2_2": -163.52101135253906, "logps_train/policy_2_w": -217.64468383789062, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -197.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.0989885330200195, "rewards_train/1-l": -2.3065433502197266, "rewards_train/1-w": 2.3376879692077637, "rewards_train/2-2": 2.213718891143799, "rewards_train/2-w": 1.2605316638946533, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.64423131942749, "rewards_train/margins_1": 1.2386994361877441, "rewards_train/margins_2": 0.9531872272491455, "step": 55 }, { "epoch": 0.16, "logps_train/policy_1_2": -150.61888122558594, "logps_train/policy_1_l": -122.5296630859375, "logps_train/policy_1_w": -187.3699951171875, "logps_train/policy_2_2": -124.3585205078125, "logps_train/policy_2_w": -238.14865112304688, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.5295177698135376, "rewards_train/1-l": -0.758434534072876, "rewards_train/1-w": 3.0632944107055664, "rewards_train/2-2": 1.821960210800171, "rewards_train/2-w": 1.6757588386535645, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.8217289447784424, "rewards_train/margins_1": 1.5337766408920288, "rewards_train/margins_2": 0.14620137214660645, "step": 55 }, { "epoch": 0.16, "logps_train/policy_1_2": -81.83432006835938, "logps_train/policy_1_l": -43.53652572631836, "logps_train/policy_1_w": -52.16108322143555, "logps_train/policy_2_2": -68.41514587402344, "logps_train/policy_2_w": -65.13699340820312, "logps_train/ref_1_2": -88.5, "logps_train/ref_1_l": -40.75, "logps_train/ref_1_w": -62.5, "logps_train/ref_2_2": -77.5, "logps_train/ref_2_w": -73.0, "rewards_train/1-2": 0.679800271987915, "rewards_train/1-l": -0.28407251834869385, "rewards_train/1-w": 1.0226612091064453, "rewards_train/2-2": 0.9059957265853882, "rewards_train/2-w": 0.7862033843994141, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.3067337274551392, "rewards_train/margins_1": 0.3428609371185303, "rewards_train/margins_2": 0.11979234218597412, "step": 55 }, { "epoch": 0.16, "logps_train/policy_1_2": -118.88135528564453, "logps_train/policy_1_l": -94.65467834472656, "logps_train/policy_1_w": -80.14805603027344, "logps_train/policy_2_2": -91.57555389404297, "logps_train/policy_2_w": -106.02894592285156, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -92.0, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 0.8165522813796997, "rewards_train/1-l": -0.24713310599327087, "rewards_train/1-w": 1.0787237882614136, "rewards_train/2-2": 1.0436164140701294, "rewards_train/2-w": 0.4991569519042969, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.3258568942546844, "rewards_train/margins_1": 0.26217150688171387, "rewards_train/margins_2": 0.5444594621658325, "step": 55 }, { "epoch": 0.16, "logps_train/policy_1_2": -172.1224365234375, "logps_train/policy_1_l": -151.6197509765625, "logps_train/policy_1_w": -113.65919494628906, "logps_train/policy_2_2": -137.91912841796875, "logps_train/policy_2_w": -147.62039184570312, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 0.5690052509307861, "rewards_train/1-l": -1.6469346284866333, "rewards_train/1-w": 1.956345796585083, "rewards_train/2-2": 1.1963680982589722, "rewards_train/2-w": 1.4219452142715454, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.6032804250717163, "rewards_train/margins_1": 1.3873405456542969, "rewards_train/margins_2": -0.22557711601257324, "step": 55 }, { "epoch": 0.16, "logps_train/policy_1_2": -143.15963745117188, "logps_train/policy_1_l": -100.74224853515625, "logps_train/policy_1_w": -69.34225463867188, "logps_train/policy_2_2": -118.32608032226562, "logps_train/policy_2_w": -82.96849822998047, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -76.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -88.5, "rewards_train/1-2": 0.4480990469455719, "rewards_train/1-l": -0.6109434962272644, "rewards_train/1-w": 0.7032743096351624, "rewards_train/2-2": 1.254892110824585, "rewards_train/2-w": 0.5554940104484558, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.3142178058624268, "rewards_train/margins_1": 0.25517526268959045, "rewards_train/margins_2": 0.6993981003761292, "step": 55 }, { "epoch": 0.16, "logps_train/policy_1_2": -266.9764404296875, "logps_train/policy_1_l": -194.2850341796875, "logps_train/policy_1_w": -157.51470947265625, "logps_train/policy_2_2": -224.67050170898438, "logps_train/policy_2_w": -190.94692993164062, "logps_train/ref_1_2": -288.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -255.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.947669506072998, "rewards_train/1-l": 0.16290263831615448, "rewards_train/1-w": 1.6563432216644287, "rewards_train/2-2": 3.030606746673584, "rewards_train/2-w": 1.145930290222168, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.4934405833482742, "rewards_train/margins_1": -0.29132628440856934, "rewards_train/margins_2": 1.884676456451416, "step": 55 }, { "epoch": 0.17, "learning_rate": 4.985252668346077e-06, "loss": 0.9425, "step": 56 }, { "epoch": 0.17, "logps_train/policy_1_2": -192.33497619628906, "logps_train/policy_1_l": -239.84446716308594, "logps_train/policy_1_w": -174.35171508789062, "logps_train/policy_2_2": -159.6351318359375, "logps_train/policy_2_w": -206.45443725585938, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 1.7180655002593994, "rewards_train/1-l": -2.9930405616760254, "rewards_train/1-w": 2.5031094551086426, "rewards_train/2-2": 2.4005494117736816, "rewards_train/2-w": 1.8983064889907837, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.496150016784668, "rewards_train/margins_1": 0.7850439548492432, "rewards_train/margins_2": 0.502242922782898, "step": 56 }, { "epoch": 0.17, "logps_train/policy_1_2": -158.0826416015625, "logps_train/policy_1_l": -140.09320068359375, "logps_train/policy_1_w": -137.38320922851562, "logps_train/policy_2_2": -129.48434448242188, "logps_train/policy_2_w": -170.531005859375, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 0.9511111378669739, "rewards_train/1-l": -1.2542426586151123, "rewards_train/1-w": 1.7697851657867432, "rewards_train/2-2": 1.6125025749206543, "rewards_train/2-w": 1.1062740087509155, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.0240278244018555, "rewards_train/margins_1": 0.8186740279197693, "rewards_train/margins_2": 0.5062285661697388, "step": 56 }, { "epoch": 0.17, "logps_train/policy_1_2": -124.45247650146484, "logps_train/policy_1_l": -87.83184814453125, "logps_train/policy_1_w": -118.32440185546875, "logps_train/policy_2_2": -111.29962158203125, "logps_train/policy_2_w": -139.63845825195312, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 1.120377540588379, "rewards_train/1-l": -0.509991466999054, "rewards_train/1-w": 1.2784974575042725, "rewards_train/2-2": 1.3012876510620117, "rewards_train/2-w": 0.9236552119255066, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.7884889245033264, "rewards_train/margins_1": 0.15811991691589355, "rewards_train/margins_2": 0.3776324391365051, "step": 56 }, { "epoch": 0.17, "logps_train/policy_1_2": -87.92390441894531, "logps_train/policy_1_l": -90.1343002319336, "logps_train/policy_1_w": -86.74406433105469, "logps_train/policy_2_2": -78.02981567382812, "logps_train/policy_2_w": -103.4608154296875, "logps_train/ref_1_2": -97.0, "logps_train/ref_1_l": -85.0, "logps_train/ref_1_w": -97.5, "logps_train/ref_2_2": -88.5, "logps_train/ref_2_w": -113.0, "rewards_train/1-2": 0.9013601541519165, "rewards_train/1-l": -0.5251492261886597, "rewards_train/1-w": 1.0646557807922363, "rewards_train/2-2": 1.0493625402450562, "rewards_train/2-w": 0.9882937073707581, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.589805006980896, "rewards_train/margins_1": 0.16329562664031982, "rewards_train/margins_2": 0.061068832874298096, "step": 56 }, { "epoch": 0.17, "logps_train/policy_1_2": -191.00881958007812, "logps_train/policy_1_l": -270.7413330078125, "logps_train/policy_1_w": -191.07078552246094, "logps_train/policy_2_2": -167.33090209960938, "logps_train/policy_2_w": -237.96475219726562, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -255.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -252.0, "rewards_train/1-2": 1.3069298267364502, "rewards_train/1-l": -1.5960054397583008, "rewards_train/1-w": 2.1116716861724854, "rewards_train/2-2": 1.9106590747833252, "rewards_train/2-w": 1.4472745656967163, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.707677125930786, "rewards_train/margins_1": 0.8047418594360352, "rewards_train/margins_2": 0.4633845090866089, "step": 56 }, { "epoch": 0.17, "logps_train/policy_1_2": -96.62051391601562, "logps_train/policy_1_l": -67.80168151855469, "logps_train/policy_1_w": -76.67107391357422, "logps_train/policy_2_2": -75.76268005371094, "logps_train/policy_2_w": -94.89756774902344, "logps_train/ref_1_2": -107.5, "logps_train/ref_1_l": -64.5, "logps_train/ref_1_w": -94.5, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -109.5, "rewards_train/1-2": 1.067636251449585, "rewards_train/1-l": -0.31786373257637024, "rewards_train/1-w": 1.786017894744873, "rewards_train/2-2": 1.516505479812622, "rewards_train/2-w": 1.423524260520935, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.1038816273212433, "rewards_train/margins_1": 0.7183816432952881, "rewards_train/margins_2": 0.09298121929168701, "step": 56 }, { "epoch": 0.17, "logps_train/policy_1_2": -152.93067932128906, "logps_train/policy_1_l": -147.42750549316406, "logps_train/policy_1_w": -195.312255859375, "logps_train/policy_2_2": -127.95521545410156, "logps_train/policy_2_w": -239.644287109375, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -255.0, "rewards_train/1-2": 1.3100574016571045, "rewards_train/1-l": -0.8929459452629089, "rewards_train/1-w": 2.8066654205322266, "rewards_train/2-2": 1.6107275485992432, "rewards_train/2-w": 1.564479112625122, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.6996113657951355, "rewards_train/margins_1": 1.496608018875122, "rewards_train/margins_2": 0.046248435974121094, "step": 56 }, { "epoch": 0.17, "logps_train/policy_1_2": -177.76986694335938, "logps_train/policy_1_l": -260.1331787109375, "logps_train/policy_1_w": -179.4618682861328, "logps_train/policy_2_2": -153.98797607421875, "logps_train/policy_2_w": -233.5979766845703, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -247.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 1.7355140447616577, "rewards_train/1-l": -1.2945674657821655, "rewards_train/1-w": 2.1975626945495605, "rewards_train/2-2": 1.9949524402618408, "rewards_train/2-w": 1.3027029037475586, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.492130160331726, "rewards_train/margins_1": 0.46204864978790283, "rewards_train/margins_2": 0.6922495365142822, "step": 56 }, { "epoch": 0.17, "logps_train/policy_1_2": -113.03034973144531, "logps_train/policy_1_l": -126.7420654296875, "logps_train/policy_1_w": -115.25155639648438, "logps_train/policy_2_2": -88.2877197265625, "logps_train/policy_2_w": -147.97052001953125, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -105.5, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.313371181488037, "rewards_train/1-l": -1.0021356344223022, "rewards_train/1-w": 1.5228912830352783, "rewards_train/2-2": 1.704040765762329, "rewards_train/2-w": 0.6302919983863831, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.5250269174575806, "rewards_train/margins_1": 0.2095201015472412, "rewards_train/margins_2": 1.073748767375946, "step": 57 }, { "epoch": 0.17, "logps_train/policy_1_2": -132.9119873046875, "logps_train/policy_1_l": -173.64605712890625, "logps_train/policy_1_w": -134.3288116455078, "logps_train/policy_2_2": -110.6965560913086, "logps_train/policy_2_w": -160.94839477539062, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.2322375774383545, "rewards_train/1-l": -1.4604076147079468, "rewards_train/1-w": 2.2280566692352295, "rewards_train/2-2": 1.5631574392318726, "rewards_train/2-w": 1.2995944023132324, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.6884642839431763, "rewards_train/margins_1": 0.995819091796875, "rewards_train/margins_2": 0.26356303691864014, "step": 57 }, { "epoch": 0.17, "logps_train/policy_1_2": -206.31790161132812, "logps_train/policy_1_l": -182.73802185058594, "logps_train/policy_1_w": -131.35914611816406, "logps_train/policy_2_2": -167.63116455078125, "logps_train/policy_2_w": -165.10006713867188, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 1.3994590044021606, "rewards_train/1-l": -1.0109107494354248, "rewards_train/1-w": 1.920334815979004, "rewards_train/2-2": 2.011883020401001, "rewards_train/2-w": 1.399366855621338, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.9312455654144287, "rewards_train/margins_1": 0.5208758115768433, "rewards_train/margins_2": 0.6125161647796631, "step": 57 }, { "epoch": 0.17, "logps_train/policy_1_2": -129.59906005859375, "logps_train/policy_1_l": -128.37875366210938, "logps_train/policy_1_w": -80.34756469726562, "logps_train/policy_2_2": -99.328369140625, "logps_train/policy_2_w": -110.09942626953125, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -105.5, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": 0.34712573885917664, "rewards_train/1-l": -0.48709484934806824, "rewards_train/1-w": 1.0875091552734375, "rewards_train/2-2": 0.6402105689048767, "rewards_train/2-w": 0.4994319677352905, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.5746040046215057, "rewards_train/margins_1": 0.7403834164142609, "rewards_train/margins_2": 0.14077860116958618, "step": 57 }, { "epoch": 0.17, "logps_train/policy_1_2": -193.31137084960938, "logps_train/policy_1_l": -310.18975830078125, "logps_train/policy_1_w": -186.82437133789062, "logps_train/policy_2_2": -149.48074340820312, "logps_train/policy_2_w": -225.9485321044922, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -278.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -243.0, "rewards_train/1-2": 0.8532383441925049, "rewards_train/1-l": -3.2783493995666504, "rewards_train/1-w": 2.6394364833831787, "rewards_train/2-2": 1.5245810747146606, "rewards_train/2-w": 1.6535835266113281, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.917785882949829, "rewards_train/margins_1": 1.7861981391906738, "rewards_train/margins_2": -0.12900245189666748, "step": 57 }, { "epoch": 0.17, "logps_train/policy_1_2": -206.33738708496094, "logps_train/policy_1_l": -194.0814208984375, "logps_train/policy_1_w": -186.01028442382812, "logps_train/policy_2_2": -182.22463989257812, "logps_train/policy_2_w": -211.40151977539062, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -203.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.586573600769043, "rewards_train/1-l": -1.9464234113693237, "rewards_train/1-w": 1.6950656175613403, "rewards_train/2-2": 2.188474178314209, "rewards_train/2-w": 1.0754724740982056, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.641489028930664, "rewards_train/margins_1": 0.10849201679229736, "rewards_train/margins_2": 1.1130017042160034, "step": 57 }, { "epoch": 0.17, "logps_train/policy_1_2": -148.02867126464844, "logps_train/policy_1_l": -161.37786865234375, "logps_train/policy_1_w": -122.47877502441406, "logps_train/policy_2_2": -120.02925109863281, "logps_train/policy_2_w": -161.43023681640625, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.7611474990844727, "rewards_train/1-l": -1.4854438304901123, "rewards_train/1-w": 2.436107635498047, "rewards_train/2-2": 2.4664602279663086, "rewards_train/2-w": 1.43705415725708, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.921551465988159, "rewards_train/margins_1": 0.6749601364135742, "rewards_train/margins_2": 1.0294060707092285, "step": 57 }, { "epoch": 0.17, "logps_train/policy_1_2": -160.12652587890625, "logps_train/policy_1_l": -203.4713134765625, "logps_train/policy_1_w": -147.43804931640625, "logps_train/policy_2_2": -142.5264892578125, "logps_train/policy_2_w": -168.44265747070312, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.0857853889465332, "rewards_train/1-l": -2.0240845680236816, "rewards_train/1-w": 1.3377869129180908, "rewards_train/2-2": 1.253600835800171, "rewards_train/2-w": 1.0299538373947144, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.3618714809417725, "rewards_train/margins_1": 0.2520015239715576, "rewards_train/margins_2": 0.22364699840545654, "step": 57 }, { "epoch": 0.17, "learning_rate": 4.982452740033793e-06, "loss": 0.8876, "step": 58 }, { "epoch": 0.17, "logps_train/policy_1_2": -113.67758178710938, "logps_train/policy_1_l": -78.74237060546875, "logps_train/policy_1_w": -52.64436721801758, "logps_train/policy_2_2": -102.92324829101562, "logps_train/policy_2_w": -63.0177001953125, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -71.5, "logps_train/ref_1_w": -58.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -65.0, "rewards_train/1-2": 1.0322412252426147, "rewards_train/1-l": -0.7076348662376404, "rewards_train/1-w": 0.5312663912773132, "rewards_train/2-2": 1.1158783435821533, "rewards_train/2-w": 0.18573005497455597, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.2389012575149536, "rewards_train/margins_1": -0.5009748339653015, "rewards_train/margins_2": 0.9301482886075974, "step": 58 }, { "epoch": 0.17, "logps_train/policy_1_2": -124.25105285644531, "logps_train/policy_1_l": -133.2725372314453, "logps_train/policy_1_w": -147.48919677734375, "logps_train/policy_2_2": -101.44300842285156, "logps_train/policy_2_w": -180.89013671875, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -110.5, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 0.6389575004577637, "rewards_train/1-l": -0.29600420594215393, "rewards_train/1-w": 1.335455298423767, "rewards_train/2-2": 0.9103865027427673, "rewards_train/2-w": 0.4609856605529785, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.631459504365921, "rewards_train/margins_1": 0.6964977979660034, "rewards_train/margins_2": 0.4494008421897888, "step": 58 }, { "epoch": 0.17, "logps_train/policy_1_2": -189.60638427734375, "logps_train/policy_1_l": -122.79813385009766, "logps_train/policy_1_w": -137.96563720703125, "logps_train/policy_2_2": -171.3369903564453, "logps_train/policy_2_w": -166.7876434326172, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.5346739292144775, "rewards_train/1-l": -0.96506667137146, "rewards_train/1-w": 1.8409371376037598, "rewards_train/2-2": 2.0498945713043213, "rewards_train/2-w": 1.1806113719940186, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8060038089752197, "rewards_train/margins_1": 0.3062632083892822, "rewards_train/margins_2": 0.8692831993103027, "step": 58 }, { "epoch": 0.17, "logps_train/policy_1_2": -96.27359771728516, "logps_train/policy_1_l": -86.9388656616211, "logps_train/policy_1_w": -87.02310180664062, "logps_train/policy_2_2": -81.17604064941406, "logps_train/policy_2_w": -108.765869140625, "logps_train/ref_1_2": -103.0, "logps_train/ref_1_l": -82.5, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -89.0, "logps_train/ref_2_w": -114.5, "rewards_train/1-2": 0.6554524898529053, "rewards_train/1-l": -0.4513084888458252, "rewards_train/1-w": 0.8601893186569214, "rewards_train/2-2": 0.7405992150306702, "rewards_train/2-w": 0.5546630024909973, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 1.3114978075027466, "rewards_train/margins_1": 0.2047368288040161, "rewards_train/margins_2": 0.18593621253967285, "step": 58 }, { "epoch": 0.17, "logps_train/policy_1_2": -144.56260681152344, "logps_train/policy_1_l": -102.5853500366211, "logps_train/policy_1_w": -139.21466064453125, "logps_train/policy_2_2": -122.99655151367188, "logps_train/policy_2_w": -168.83815002441406, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.1070210933685303, "rewards_train/1-l": -0.9738187789916992, "rewards_train/1-w": 1.73634672164917, "rewards_train/2-2": 1.4265172481536865, "rewards_train/2-w": 1.087279200553894, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.710165500640869, "rewards_train/margins_1": 0.6293256282806396, "rewards_train/margins_2": 0.3392380475997925, "step": 58 }, { "epoch": 0.17, "logps_train/policy_1_2": -203.88278198242188, "logps_train/policy_1_l": -269.54351806640625, "logps_train/policy_1_w": -121.62551879882812, "logps_train/policy_2_2": -168.78762817382812, "logps_train/policy_2_w": -150.7681884765625, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -244.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 2.017970561981201, "rewards_train/1-l": -2.612164258956909, "rewards_train/1-w": 1.7222139835357666, "rewards_train/2-2": 2.968111515045166, "rewards_train/2-w": 1.0821647644042969, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.334378242492676, "rewards_train/margins_1": -0.29575657844543457, "rewards_train/margins_2": 1.8859467506408691, "step": 58 }, { "epoch": 0.17, "logps_train/policy_1_2": -118.17552185058594, "logps_train/policy_1_l": -104.38773345947266, "logps_train/policy_1_w": -108.98857879638672, "logps_train/policy_2_2": -101.10420227050781, "logps_train/policy_2_w": -124.60609436035156, "logps_train/ref_1_2": -126.5, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.8109631538391113, "rewards_train/1-l": -0.9676799774169922, "rewards_train/1-w": 1.2355167865753174, "rewards_train/2-2": 1.3161423206329346, "rewards_train/2-w": 0.9597029089927673, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.2031967639923096, "rewards_train/margins_1": 0.42455363273620605, "rewards_train/margins_2": 0.35643941164016724, "step": 58 }, { "epoch": 0.17, "logps_train/policy_1_2": -138.99551391601562, "logps_train/policy_1_l": -163.04278564453125, "logps_train/policy_1_w": -141.84747314453125, "logps_train/policy_2_2": -114.71289825439453, "logps_train/policy_2_w": -187.05615234375, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 1.5871665477752686, "rewards_train/1-l": -0.9003727436065674, "rewards_train/1-w": 2.2965028285980225, "rewards_train/2-2": 2.107225179672241, "rewards_train/2-w": 1.5717281103134155, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.19687557220459, "rewards_train/margins_1": 0.7093362808227539, "rewards_train/margins_2": 0.5354970693588257, "step": 58 }, { "epoch": 0.18, "logps_train/policy_1_2": -150.05038452148438, "logps_train/policy_1_l": -196.18887329101562, "logps_train/policy_1_w": -124.48596954345703, "logps_train/policy_2_2": -122.20155334472656, "logps_train/policy_2_w": -162.890380859375, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 0.885587215423584, "rewards_train/1-l": -1.371232271194458, "rewards_train/1-w": 1.5264031887054443, "rewards_train/2-2": 1.3798446655273438, "rewards_train/2-w": 0.6547123789787292, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.8976354598999023, "rewards_train/margins_1": 0.6408159732818604, "rewards_train/margins_2": 0.7251322865486145, "step": 59 }, { "epoch": 0.18, "logps_train/policy_1_2": -130.21807861328125, "logps_train/policy_1_l": -153.31085205078125, "logps_train/policy_1_w": -86.82867431640625, "logps_train/policy_2_2": -107.5489730834961, "logps_train/policy_2_w": -113.31309509277344, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 0.7828794717788696, "rewards_train/1-l": -1.4187800884246826, "rewards_train/1-w": 1.385101556777954, "rewards_train/2-2": 1.1036970615386963, "rewards_train/2-w": 0.7874404191970825, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.8038816452026367, "rewards_train/margins_1": 0.6022220849990845, "rewards_train/margins_2": 0.31625664234161377, "step": 59 }, { "epoch": 0.18, "logps_train/policy_1_2": -165.3185577392578, "logps_train/policy_1_l": -221.88734436035156, "logps_train/policy_1_w": -145.46044921875, "logps_train/policy_2_2": -138.03842163085938, "logps_train/policy_2_w": -179.9993133544922, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.0931439399719238, "rewards_train/1-l": -2.8231101036071777, "rewards_train/1-w": 2.0094237327575684, "rewards_train/2-2": 1.5742815732955933, "rewards_train/2-w": 1.167256474494934, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.832533836364746, "rewards_train/margins_1": 0.9162797927856445, "rewards_train/margins_2": 0.4070250988006592, "step": 59 }, { "epoch": 0.18, "logps_train/policy_1_2": -121.01150512695312, "logps_train/policy_1_l": -123.60966491699219, "logps_train/policy_1_w": -83.43624877929688, "logps_train/policy_2_2": -96.93460083007812, "logps_train/policy_2_w": -103.56085205078125, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -95.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -112.5, "rewards_train/1-2": 0.8230680227279663, "rewards_train/1-l": -0.6545211672782898, "rewards_train/1-w": 1.1204373836517334, "rewards_train/2-2": 1.32216477394104, "rewards_train/2-w": 0.8954777121543884, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.7749585509300232, "rewards_train/margins_1": 0.2973693609237671, "rewards_train/margins_2": 0.4266870617866516, "step": 59 }, { "epoch": 0.18, "logps_train/policy_1_2": -149.271484375, "logps_train/policy_1_l": -101.5633544921875, "logps_train/policy_1_w": -72.4637451171875, "logps_train/policy_2_2": -115.5052261352539, "logps_train/policy_2_w": -91.70156860351562, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -97.5, "rewards_train/1-2": 2.079102039337158, "rewards_train/1-l": -1.7548702955245972, "rewards_train/1-w": 1.179015874862671, "rewards_train/2-2": 2.769789218902588, "rewards_train/2-w": 0.6173427104949951, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.933886170387268, "rewards_train/margins_1": -0.9000861644744873, "rewards_train/margins_2": 2.1524465084075928, "step": 59 }, { "epoch": 0.18, "logps_train/policy_1_2": -131.26165771484375, "logps_train/policy_1_l": -167.5311279296875, "logps_train/policy_1_w": -134.4400177001953, "logps_train/policy_2_2": -114.71556091308594, "logps_train/policy_2_w": -156.08880615234375, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.093364953994751, "rewards_train/1-l": -1.0992053747177124, "rewards_train/1-w": 1.276310920715332, "rewards_train/2-2": 1.4663350582122803, "rewards_train/2-w": 0.9926807880401611, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.3755162954330444, "rewards_train/margins_1": 0.18294596672058105, "rewards_train/margins_2": 0.47365427017211914, "step": 59 }, { "epoch": 0.18, "logps_train/policy_1_2": -149.57424926757812, "logps_train/policy_1_l": -103.6705093383789, "logps_train/policy_1_w": -77.59310913085938, "logps_train/policy_2_2": -115.3271484375, "logps_train/policy_2_w": -111.56316375732422, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 1.0527311563491821, "rewards_train/1-l": -0.7358250021934509, "rewards_train/1-w": 1.2461578845977783, "rewards_train/2-2": 2.044628858566284, "rewards_train/2-w": 0.7007144093513489, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.9819828867912292, "rewards_train/margins_1": 0.1934267282485962, "rewards_train/margins_2": 1.3439144492149353, "step": 59 }, { "epoch": 0.18, "logps_train/policy_1_2": -143.6812744140625, "logps_train/policy_1_l": -100.51942443847656, "logps_train/policy_1_w": -194.4471435546875, "logps_train/policy_2_2": -124.6064453125, "logps_train/policy_2_w": -230.50863647460938, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 1.3771862983703613, "rewards_train/1-l": -0.5839733481407166, "rewards_train/1-w": 2.427161693572998, "rewards_train/2-2": 1.6768547296524048, "rewards_train/2-w": 1.5303863286972046, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.0111350417137146, "rewards_train/margins_1": 1.0499753952026367, "rewards_train/margins_2": 0.1464684009552002, "step": 59 }, { "epoch": 0.18, "learning_rate": 4.979410528710376e-06, "loss": 1.0493, "step": 60 }, { "epoch": 0.18, "logps_train/policy_1_2": -201.6337890625, "logps_train/policy_1_l": -160.51406860351562, "logps_train/policy_1_w": -112.09452819824219, "logps_train/policy_2_2": -173.2309112548828, "logps_train/policy_2_w": -134.46923828125, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.8053717613220215, "rewards_train/1-l": -1.4955480098724365, "rewards_train/1-w": 1.9007031917572021, "rewards_train/2-2": 2.5319864749908447, "rewards_train/2-w": 1.485107183456421, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.3962512016296387, "rewards_train/margins_1": 0.09533143043518066, "rewards_train/margins_2": 1.0468792915344238, "step": 60 }, { "epoch": 0.18, "logps_train/policy_1_2": -136.73477172851562, "logps_train/policy_1_l": -111.95514678955078, "logps_train/policy_1_w": -117.42208862304688, "logps_train/policy_2_2": -115.54596710205078, "logps_train/policy_2_w": -149.52166748046875, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.362461805343628, "rewards_train/1-l": -0.5956119298934937, "rewards_train/1-w": 2.082791328430176, "rewards_train/2-2": 1.6477468013763428, "rewards_train/2-w": 1.1462706327438354, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.6784032583236694, "rewards_train/margins_1": 0.7203295230865479, "rewards_train/margins_2": 0.5014761686325073, "step": 60 }, { "epoch": 0.18, "logps_train/policy_1_2": -177.03375244140625, "logps_train/policy_1_l": -250.74267578125, "logps_train/policy_1_w": -143.1997833251953, "logps_train/policy_2_2": -148.11781311035156, "logps_train/policy_2_w": -179.74969482421875, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -232.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 2.1231861114501953, "rewards_train/1-l": -1.904736042022705, "rewards_train/1-w": 1.856584072113037, "rewards_train/2-2": 2.474155902862549, "rewards_train/2-w": 0.7750294208526611, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.761320114135742, "rewards_train/margins_1": -0.2666020393371582, "rewards_train/margins_2": 1.6991264820098877, "step": 60 }, { "epoch": 0.18, "logps_train/policy_1_2": -225.32986450195312, "logps_train/policy_1_l": -221.34207153320312, "logps_train/policy_1_w": -222.48858642578125, "logps_train/policy_2_2": -191.0751495361328, "logps_train/policy_2_w": -279.6106872558594, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -244.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -288.0, "rewards_train/1-2": 0.8668180704116821, "rewards_train/1-l": -1.8740507364273071, "rewards_train/1-w": 2.1566085815429688, "rewards_train/2-2": 1.3707078695297241, "rewards_train/2-w": 0.9561194181442261, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.030659317970276, "rewards_train/margins_1": 1.2897905111312866, "rewards_train/margins_2": 0.41458845138549805, "step": 60 }, { "epoch": 0.18, "logps_train/policy_1_2": -37.32872772216797, "logps_train/policy_1_l": -46.210445404052734, "logps_train/policy_1_w": -60.30079650878906, "logps_train/policy_2_2": -27.83493423461914, "logps_train/policy_2_w": -82.36607360839844, "logps_train/ref_1_2": -38.0, "logps_train/ref_1_l": -41.75, "logps_train/ref_1_w": -75.5, "logps_train/ref_2_2": -30.75, "logps_train/ref_2_w": -94.0, "rewards_train/1-2": 0.055017657577991486, "rewards_train/1-l": -0.44501885771751404, "rewards_train/1-w": 1.5026352405548096, "rewards_train/2-2": 0.28720980882644653, "rewards_train/2-w": 1.130579948425293, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.9476540982723236, "rewards_train/margins_1": 1.447617582976818, "rewards_train/margins_2": -0.8433701395988464, "step": 60 }, { "epoch": 0.18, "logps_train/policy_1_2": -91.43057250976562, "logps_train/policy_1_l": -115.7500228881836, "logps_train/policy_1_w": -96.504150390625, "logps_train/policy_2_2": -77.35002136230469, "logps_train/policy_2_w": -112.97801208496094, "logps_train/ref_1_2": -94.5, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -115.5, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 0.3206154704093933, "rewards_train/1-l": -1.2501966953277588, "rewards_train/1-w": 1.9019293785095215, "rewards_train/2-2": 0.88311368227005, "rewards_train/2-w": 1.6287612915039062, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.125, "rewards_train/margins": 3.1521260738372803, "rewards_train/margins_1": 1.5813139081001282, "rewards_train/margins_2": -0.7456476092338562, "step": 60 }, { "epoch": 0.18, "logps_train/policy_1_2": -190.80368041992188, "logps_train/policy_1_l": -184.4888916015625, "logps_train/policy_1_w": -132.88316345214844, "logps_train/policy_2_2": -161.22073364257812, "logps_train/policy_2_w": -163.81488037109375, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.767092227935791, "rewards_train/1-l": -1.2508418560028076, "rewards_train/1-w": 2.3351218700408936, "rewards_train/2-2": 2.338083028793335, "rewards_train/2-w": 1.609136700630188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.585963726043701, "rewards_train/margins_1": 0.5680296421051025, "rewards_train/margins_2": 0.728946328163147, "step": 60 }, { "epoch": 0.18, "logps_train/policy_1_2": -65.24860382080078, "logps_train/policy_1_l": -54.60050964355469, "logps_train/policy_1_w": -62.856056213378906, "logps_train/policy_2_2": -50.92231750488281, "logps_train/policy_2_w": -85.19090270996094, "logps_train/ref_1_2": -73.0, "logps_train/ref_1_l": -47.0, "logps_train/ref_1_w": -72.5, "logps_train/ref_2_2": -60.25, "logps_train/ref_2_w": -89.0, "rewards_train/1-2": 0.7724051475524902, "rewards_train/1-l": -0.7633713483810425, "rewards_train/1-w": 0.9565816521644592, "rewards_train/2-2": 0.9343311190605164, "rewards_train/2-w": 0.3840356767177582, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.7199530005455017, "rewards_train/margins_1": 0.184176504611969, "rewards_train/margins_2": 0.5502954423427582, "step": 60 }, { "epoch": 0.18, "logps_train/policy_1_2": -215.68161010742188, "logps_train/policy_1_l": -156.1849365234375, "logps_train/policy_1_w": -129.56414794921875, "logps_train/policy_2_2": -176.45953369140625, "logps_train/policy_2_w": -164.3922576904297, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.4599637985229492, "rewards_train/1-l": -1.2329480648040771, "rewards_train/1-w": 1.7068674564361572, "rewards_train/2-2": 2.1993587017059326, "rewards_train/2-w": 0.595149040222168, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.9398155212402344, "rewards_train/margins_1": 0.246903657913208, "rewards_train/margins_2": 1.6042096614837646, "step": 61 }, { "epoch": 0.18, "logps_train/policy_1_2": -132.6901092529297, "logps_train/policy_1_l": -142.5127716064453, "logps_train/policy_1_w": -103.36039733886719, "logps_train/policy_2_2": -106.04415893554688, "logps_train/policy_2_w": -133.8043212890625, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.125519871711731, "rewards_train/1-l": -1.0896568298339844, "rewards_train/1-w": 0.872162938117981, "rewards_train/2-2": 1.6854274272918701, "rewards_train/2-w": 0.5894904136657715, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.9618197679519653, "rewards_train/margins_1": -0.25335693359375, "rewards_train/margins_2": 1.0959370136260986, "step": 61 }, { "epoch": 0.18, "logps_train/policy_1_2": -240.20884704589844, "logps_train/policy_1_l": -250.9680633544922, "logps_train/policy_1_w": -155.6778564453125, "logps_train/policy_2_2": -204.04473876953125, "logps_train/policy_2_w": -191.30941772460938, "logps_train/ref_1_2": -254.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -230.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.4025521278381348, "rewards_train/1-l": -2.0514936447143555, "rewards_train/1-w": 1.4478390216827393, "rewards_train/2-2": 2.681462287902832, "rewards_train/2-w": 1.2549943923950195, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4993326663970947, "rewards_train/margins_1": 0.04528689384460449, "rewards_train/margins_2": 1.4264678955078125, "step": 61 }, { "epoch": 0.18, "logps_train/policy_1_2": -145.14105224609375, "logps_train/policy_1_l": -157.15135192871094, "logps_train/policy_1_w": -128.07171630859375, "logps_train/policy_2_2": -122.8601303100586, "logps_train/policy_2_w": -153.99307250976562, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.0280810594558716, "rewards_train/1-l": -0.6737291216850281, "rewards_train/1-w": 1.058453917503357, "rewards_train/2-2": 1.5639867782592773, "rewards_train/2-w": 0.47256892919540405, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.732183039188385, "rewards_train/margins_1": 0.03037285804748535, "rewards_train/margins_2": 1.0914178490638733, "step": 61 }, { "epoch": 0.18, "logps_train/policy_1_2": -150.84266662597656, "logps_train/policy_1_l": -154.48989868164062, "logps_train/policy_1_w": -156.0239715576172, "logps_train/policy_2_2": -134.00360107421875, "logps_train/policy_2_w": -187.42745971679688, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 1.1840927600860596, "rewards_train/1-l": -1.29820716381073, "rewards_train/1-w": 2.2304153442382812, "rewards_train/2-2": 1.3029612302780151, "rewards_train/2-w": 1.3361597061157227, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.5286225080490112, "rewards_train/margins_1": 1.0463225841522217, "rewards_train/margins_2": -0.03319847583770752, "step": 61 }, { "epoch": 0.18, "logps_train/policy_1_2": -142.2001495361328, "logps_train/policy_1_l": -110.96762084960938, "logps_train/policy_1_w": -103.60504150390625, "logps_train/policy_2_2": -115.9582748413086, "logps_train/policy_2_w": -124.52301788330078, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -98.5, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 1.6330127716064453, "rewards_train/1-l": -1.2431732416152954, "rewards_train/1-w": 1.9838067293167114, "rewards_train/2-2": 2.3064181804656982, "rewards_train/2-w": 1.3857840299606323, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.226979970932007, "rewards_train/margins_1": 0.3507939577102661, "rewards_train/margins_2": 0.9206341505050659, "step": 61 }, { "epoch": 0.18, "logps_train/policy_1_2": -145.0908966064453, "logps_train/policy_1_l": -145.51101684570312, "logps_train/policy_1_w": -100.99320983886719, "logps_train/policy_2_2": -124.16106414794922, "logps_train/policy_2_w": -114.89254760742188, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 0.665910005569458, "rewards_train/1-l": -1.2220004796981812, "rewards_train/1-w": 0.9807578325271606, "rewards_train/2-2": 1.083503007888794, "rewards_train/2-w": 0.7982450723648071, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.202758312225342, "rewards_train/margins_1": 0.31484782695770264, "rewards_train/margins_2": 0.2852579355239868, "step": 61 }, { "epoch": 0.18, "logps_train/policy_1_2": -182.30215454101562, "logps_train/policy_1_l": -141.78651428222656, "logps_train/policy_1_w": -167.83950805664062, "logps_train/policy_2_2": -144.98204040527344, "logps_train/policy_2_w": -207.07034301757812, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.5247166156768799, "rewards_train/1-l": -1.0849016904830933, "rewards_train/1-w": 1.886996865272522, "rewards_train/2-2": 2.220546245574951, "rewards_train/2-w": 1.0285115242004395, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.9718985557556152, "rewards_train/margins_1": 0.3622802495956421, "rewards_train/margins_2": 1.1920347213745117, "step": 61 }, { "epoch": 0.19, "learning_rate": 4.97612633129029e-06, "loss": 0.9305, "step": 62 }, { "epoch": 0.19, "logps_train/policy_1_2": -165.81275939941406, "logps_train/policy_1_l": -103.22632598876953, "logps_train/policy_1_w": -118.2506103515625, "logps_train/policy_2_2": -131.86331176757812, "logps_train/policy_2_w": -148.57025146484375, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.3030991554260254, "rewards_train/1-l": -0.9287357926368713, "rewards_train/1-w": 2.2061891555786133, "rewards_train/2-2": 1.9609344005584717, "rewards_train/2-w": 1.3003956079483032, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.1349249482154846, "rewards_train/margins_1": 0.9030900001525879, "rewards_train/margins_2": 0.6605387926101685, "step": 62 }, { "epoch": 0.19, "logps_train/policy_1_2": -276.7970886230469, "logps_train/policy_1_l": -262.43267822265625, "logps_train/policy_1_w": -151.73068237304688, "logps_train/policy_2_2": -234.95639038085938, "logps_train/policy_2_w": -189.98272705078125, "logps_train/ref_1_2": -296.0, "logps_train/ref_1_l": -242.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -268.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": 1.8921676874160767, "rewards_train/1-l": -2.102644443511963, "rewards_train/1-w": 2.176931381225586, "rewards_train/2-2": 3.1574854850769043, "rewards_train/2-w": 1.7017279863357544, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.279575824737549, "rewards_train/margins_1": 0.2847636938095093, "rewards_train/margins_2": 1.45575749874115, "step": 62 }, { "epoch": 0.19, "logps_train/policy_1_2": -169.25607299804688, "logps_train/policy_1_l": -215.6458740234375, "logps_train/policy_1_w": -206.11380004882812, "logps_train/policy_2_2": -147.39126586914062, "logps_train/policy_2_w": -238.85545349121094, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -230.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.132204294204712, "rewards_train/1-l": -1.3278694152832031, "rewards_train/1-w": 2.3511195182800293, "rewards_train/2-2": 1.6718116998672485, "rewards_train/2-w": 1.551954746246338, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.6789889335632324, "rewards_train/margins_1": 1.2189152240753174, "rewards_train/margins_2": 0.11985695362091064, "step": 62 }, { "epoch": 0.19, "logps_train/policy_1_2": -204.27053833007812, "logps_train/policy_1_l": -184.10975646972656, "logps_train/policy_1_w": -136.4430389404297, "logps_train/policy_2_2": -181.76834106445312, "logps_train/policy_2_w": -154.54469299316406, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 2.0041966438293457, "rewards_train/1-l": -1.3449599742889404, "rewards_train/1-w": 1.593196153640747, "rewards_train/2-2": 2.4825408458709717, "rewards_train/2-w": 1.1892807483673096, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.9381561279296875, "rewards_train/margins_1": -0.41100049018859863, "rewards_train/margins_2": 1.293260097503662, "step": 62 }, { "epoch": 0.19, "logps_train/policy_1_2": -109.49544525146484, "logps_train/policy_1_l": -88.91944885253906, "logps_train/policy_1_w": -114.99987030029297, "logps_train/policy_2_2": -101.05805969238281, "logps_train/policy_2_w": -129.08404541015625, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.3973308801651, "rewards_train/1-l": -0.4966330826282501, "rewards_train/1-w": 1.5553357601165771, "rewards_train/2-2": 1.498100757598877, "rewards_train/2-w": 1.3470646142959595, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.0519688427448273, "rewards_train/margins_1": 0.15800487995147705, "rewards_train/margins_2": 0.15103614330291748, "step": 62 }, { "epoch": 0.19, "logps_train/policy_1_2": -79.66050720214844, "logps_train/policy_1_l": -85.55429077148438, "logps_train/policy_1_w": -41.93171691894531, "logps_train/policy_2_2": -60.278446197509766, "logps_train/policy_2_w": -53.03154754638672, "logps_train/ref_1_2": -87.5, "logps_train/ref_1_l": -87.5, "logps_train/ref_1_w": -50.5, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -60.5, "rewards_train/1-2": 0.7757459878921509, "rewards_train/1-l": 0.20199280977249146, "rewards_train/1-w": 0.8693285584449768, "rewards_train/2-2": 1.3893427848815918, "rewards_train/2-w": 0.7452830076217651, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.6673357486724854, "rewards_train/margins_1": 0.09358257055282593, "rewards_train/margins_2": 0.6440597772598267, "step": 62 }, { "epoch": 0.19, "logps_train/policy_1_2": -179.121826171875, "logps_train/policy_1_l": -200.50543212890625, "logps_train/policy_1_w": -139.46670532226562, "logps_train/policy_2_2": -152.30224609375, "logps_train/policy_2_w": -160.8082275390625, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": 1.5878174304962158, "rewards_train/1-l": -1.5880428552627563, "rewards_train/1-w": 1.8127052783966064, "rewards_train/2-2": 2.10727596282959, "rewards_train/2-w": 1.6223011016845703, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.400748133659363, "rewards_train/margins_1": 0.22488784790039062, "rewards_train/margins_2": 0.48497486114501953, "step": 62 }, { "epoch": 0.19, "logps_train/policy_1_2": -99.38130187988281, "logps_train/policy_1_l": -151.73641967773438, "logps_train/policy_1_w": -93.49253845214844, "logps_train/policy_2_2": -81.52922058105469, "logps_train/policy_2_w": -115.39596557617188, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.1820602416992188, "rewards_train/1-l": -1.0823577642440796, "rewards_train/1-w": 1.6271873712539673, "rewards_train/2-2": 1.1367018222808838, "rewards_train/2-w": 1.0625265836715698, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.709545135498047, "rewards_train/margins_1": 0.44512712955474854, "rewards_train/margins_2": 0.07417523860931396, "step": 62 }, { "epoch": 0.19, "logps_train/policy_1_2": -116.68109893798828, "logps_train/policy_1_l": -140.146240234375, "logps_train/policy_1_w": -121.42079162597656, "logps_train/policy_2_2": -98.07185363769531, "logps_train/policy_2_w": -138.77484130859375, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.1607961654663086, "rewards_train/1-l": -0.6424569487571716, "rewards_train/1-w": 1.6704219579696655, "rewards_train/2-2": 1.6600019931793213, "rewards_train/2-w": 1.153766393661499, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.312878906726837, "rewards_train/margins_1": 0.5096257925033569, "rewards_train/margins_2": 0.5062355995178223, "step": 63 }, { "epoch": 0.19, "logps_train/policy_1_2": -158.35580444335938, "logps_train/policy_1_l": -98.95907592773438, "logps_train/policy_1_w": -130.37290954589844, "logps_train/policy_2_2": -131.45144653320312, "logps_train/policy_2_w": -158.27011108398438, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.7929342985153198, "rewards_train/1-l": -0.8830170631408691, "rewards_train/1-w": 1.840834379196167, "rewards_train/2-2": 2.610322952270508, "rewards_train/2-w": 1.297990083694458, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.723851442337036, "rewards_train/margins_1": 0.04790008068084717, "rewards_train/margins_2": 1.3123328685760498, "step": 63 }, { "epoch": 0.19, "logps_train/policy_1_2": -210.33306884765625, "logps_train/policy_1_l": -179.06251525878906, "logps_train/policy_1_w": -128.756591796875, "logps_train/policy_2_2": -164.33956909179688, "logps_train/policy_2_w": -172.21726989746094, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 2.052631139755249, "rewards_train/1-l": -1.753908395767212, "rewards_train/1-w": 2.97482967376709, "rewards_train/2-2": 2.845730781555176, "rewards_train/2-w": 1.7591321468353271, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.728738069534302, "rewards_train/margins_1": 0.9221985340118408, "rewards_train/margins_2": 1.0865986347198486, "step": 63 }, { "epoch": 0.19, "logps_train/policy_1_2": -290.551513671875, "logps_train/policy_1_l": -214.49105834960938, "logps_train/policy_1_w": -183.51931762695312, "logps_train/policy_2_2": -228.38458251953125, "logps_train/policy_2_w": -236.40362548828125, "logps_train/ref_1_2": -312.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -268.0, "logps_train/ref_2_w": -255.0, "rewards_train/1-2": 2.232346534729004, "rewards_train/1-l": -1.824105143547058, "rewards_train/1-w": 3.104318618774414, "rewards_train/2-2": 3.8584165573120117, "rewards_train/2-w": 1.878387451171875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.928423762321472, "rewards_train/margins_1": 0.8719720840454102, "rewards_train/margins_2": 1.9800291061401367, "step": 63 }, { "epoch": 0.19, "logps_train/policy_1_2": -125.7796630859375, "logps_train/policy_1_l": -157.6596221923828, "logps_train/policy_1_w": -86.03572082519531, "logps_train/policy_2_2": -103.71974182128906, "logps_train/policy_2_w": -102.22984313964844, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -108.5, "rewards_train/1-2": 1.1884396076202393, "rewards_train/1-l": -1.1245561838150024, "rewards_train/1-w": 1.1069743633270264, "rewards_train/2-2": 1.140916347503662, "rewards_train/2-w": 0.6270159482955933, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.231530547142029, "rewards_train/margins_1": -0.08146524429321289, "rewards_train/margins_2": 0.5139003992080688, "step": 63 }, { "epoch": 0.19, "logps_train/policy_1_2": -179.28302001953125, "logps_train/policy_1_l": -161.93585205078125, "logps_train/policy_1_w": -131.6827850341797, "logps_train/policy_2_2": -150.02487182617188, "logps_train/policy_2_w": -173.10498046875, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 0.453729510307312, "rewards_train/1-l": -1.4258116483688354, "rewards_train/1-w": 1.2686355113983154, "rewards_train/2-2": 1.4320836067199707, "rewards_train/2-w": 0.48715755343437195, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.694447159767151, "rewards_train/margins_1": 0.8149060010910034, "rewards_train/margins_2": 0.9449260532855988, "step": 63 }, { "epoch": 0.19, "logps_train/policy_1_2": -157.32421875, "logps_train/policy_1_l": -156.46035766601562, "logps_train/policy_1_w": -121.00863647460938, "logps_train/policy_2_2": -120.29340362548828, "logps_train/policy_2_w": -160.297607421875, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 0.7042980790138245, "rewards_train/1-l": -1.4489655494689941, "rewards_train/1-w": 2.194448709487915, "rewards_train/2-2": 1.677690863609314, "rewards_train/2-w": 1.1858634948730469, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.643414258956909, "rewards_train/margins_1": 1.4901506304740906, "rewards_train/margins_2": 0.4918273687362671, "step": 63 }, { "epoch": 0.19, "logps_train/policy_1_2": -85.21041107177734, "logps_train/policy_1_l": -85.34129333496094, "logps_train/policy_1_w": -95.24748229980469, "logps_train/policy_2_2": -74.17269897460938, "logps_train/policy_2_w": -113.92322540283203, "logps_train/ref_1_2": -95.0, "logps_train/ref_1_l": -72.5, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -88.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 0.9969277381896973, "rewards_train/1-l": -1.2583484649658203, "rewards_train/1-w": 1.2479087114334106, "rewards_train/2-2": 1.393667221069336, "rewards_train/2-w": 0.7318961024284363, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.506257176399231, "rewards_train/margins_1": 0.2509809732437134, "rewards_train/margins_2": 0.6617711186408997, "step": 63 }, { "epoch": 0.19, "learning_rate": 4.9726004683054105e-06, "loss": 0.857, "step": 64 }, { "epoch": 0.19, "logps_train/policy_1_2": -112.09580993652344, "logps_train/policy_1_l": -98.35578155517578, "logps_train/policy_1_w": -107.3848876953125, "logps_train/policy_2_2": -91.65262603759766, "logps_train/policy_2_w": -136.04486083984375, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -89.5, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.6216695308685303, "rewards_train/1-l": -0.8894845843315125, "rewards_train/1-w": 1.8177604675292969, "rewards_train/2-2": 2.0456748008728027, "rewards_train/2-w": 1.1720764636993408, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7072450518608093, "rewards_train/margins_1": 0.1960909366607666, "rewards_train/margins_2": 0.8735983371734619, "step": 64 }, { "epoch": 0.19, "logps_train/policy_1_2": -150.9354705810547, "logps_train/policy_1_l": -123.18658447265625, "logps_train/policy_1_w": -98.63735961914062, "logps_train/policy_2_2": -132.490966796875, "logps_train/policy_2_w": -109.47381591796875, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 1.4845774173736572, "rewards_train/1-l": -1.1765685081481934, "rewards_train/1-w": 0.6784510612487793, "rewards_train/2-2": 1.9118403196334839, "rewards_train/2-w": 0.4815242886543274, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.8550195693969727, "rewards_train/margins_1": -0.8061263561248779, "rewards_train/margins_2": 1.4303160309791565, "step": 64 }, { "epoch": 0.19, "logps_train/policy_1_2": -181.4368896484375, "logps_train/policy_1_l": -138.9434356689453, "logps_train/policy_1_w": -140.64828491210938, "logps_train/policy_2_2": -153.6243896484375, "logps_train/policy_2_w": -175.3726806640625, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 0.859437108039856, "rewards_train/1-l": -0.5615317225456238, "rewards_train/1-w": 1.5101712942123413, "rewards_train/2-2": 1.514123558998108, "rewards_train/2-w": 0.672106921672821, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.071703016757965, "rewards_train/margins_1": 0.6507341861724854, "rewards_train/margins_2": 0.8420166373252869, "step": 64 }, { "epoch": 0.19, "logps_train/policy_1_2": -118.03214263916016, "logps_train/policy_1_l": -73.13705444335938, "logps_train/policy_1_w": -66.21463012695312, "logps_train/policy_2_2": -97.2347412109375, "logps_train/policy_2_w": -89.41471099853516, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -67.0, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 0.6374109983444214, "rewards_train/1-l": -0.5922209620475769, "rewards_train/1-w": 1.39259934425354, "rewards_train/2-2": 1.0726196765899658, "rewards_train/2-w": 0.8866536021232605, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.984820306301117, "rewards_train/margins_1": 0.7551883459091187, "rewards_train/margins_2": 0.18596607446670532, "step": 64 }, { "epoch": 0.19, "logps_train/policy_1_2": -258.8659973144531, "logps_train/policy_1_l": -234.78244018554688, "logps_train/policy_1_w": -165.5828857421875, "logps_train/policy_2_2": -209.19906616210938, "logps_train/policy_2_w": -216.57949829101562, "logps_train/ref_1_2": -276.0, "logps_train/ref_1_l": -214.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -238.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.6571495532989502, "rewards_train/1-l": -2.0923070907592773, "rewards_train/1-w": 2.1292128562927246, "rewards_train/2-2": 2.855093479156494, "rewards_train/2-w": 1.70455002784729, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.221519947052002, "rewards_train/margins_1": 0.4720633029937744, "rewards_train/margins_2": 1.150543451309204, "step": 64 }, { "epoch": 0.19, "logps_train/policy_1_2": -145.3299560546875, "logps_train/policy_1_l": -170.01296997070312, "logps_train/policy_1_w": -72.44693756103516, "logps_train/policy_2_2": -128.0704803466797, "logps_train/policy_2_w": -91.44666290283203, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -86.5, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -100.5, "rewards_train/1-2": 0.6334108710289001, "rewards_train/1-l": -1.6073517799377441, "rewards_train/1-w": 1.4240564107894897, "rewards_train/2-2": 1.1242021322250366, "rewards_train/2-w": 0.9076780080795288, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.031408190727234, "rewards_train/margins_1": 0.7906455397605896, "rewards_train/margins_2": 0.2165241241455078, "step": 64 }, { "epoch": 0.19, "logps_train/policy_1_2": -137.9226837158203, "logps_train/policy_1_l": -154.59617614746094, "logps_train/policy_1_w": -122.62405395507812, "logps_train/policy_2_2": -112.50605773925781, "logps_train/policy_2_w": -152.36276245117188, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 0.48898178339004517, "rewards_train/1-l": -0.5537581443786621, "rewards_train/1-w": 1.1438452005386353, "rewards_train/2-2": 1.1337698698043823, "rewards_train/2-w": 0.8293487429618835, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.6976033449172974, "rewards_train/margins_1": 0.6548634171485901, "rewards_train/margins_2": 0.3044211268424988, "step": 64 }, { "epoch": 0.19, "logps_train/policy_1_2": -179.12030029296875, "logps_train/policy_1_l": -159.53488159179688, "logps_train/policy_1_w": -143.5416259765625, "logps_train/policy_2_2": -156.1579132080078, "logps_train/policy_2_w": -172.1351318359375, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.7942206859588623, "rewards_train/1-l": -1.3575890064239502, "rewards_train/1-w": 2.022400379180908, "rewards_train/2-2": 2.218583583831787, "rewards_train/2-w": 1.3153927326202393, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3799893856048584, "rewards_train/margins_1": 0.2281796932220459, "rewards_train/margins_2": 0.9031908512115479, "step": 64 }, { "epoch": 0.19, "logps_train/policy_1_2": -174.75909423828125, "logps_train/policy_1_l": -229.35678100585938, "logps_train/policy_1_w": -186.68399047851562, "logps_train/policy_2_2": -140.36386108398438, "logps_train/policy_2_w": -233.13870239257812, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -258.0, "rewards_train/1-2": 1.1334655284881592, "rewards_train/1-l": -2.691927909851074, "rewards_train/1-w": 3.773007869720459, "rewards_train/2-2": 1.6573636531829834, "rewards_train/2-w": 2.5158185958862305, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.125, "rewards_train/margins": 6.464935779571533, "rewards_train/margins_1": 2.6395423412323, "rewards_train/margins_2": -0.8584549427032471, "step": 65 }, { "epoch": 0.19, "logps_train/policy_1_2": -97.40104675292969, "logps_train/policy_1_l": -118.34880828857422, "logps_train/policy_1_w": -102.16989135742188, "logps_train/policy_2_2": -80.17471313476562, "logps_train/policy_2_w": -125.15530395507812, "logps_train/ref_1_2": -101.5, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -115.5, "logps_train/ref_2_2": -85.5, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.4066723585128784, "rewards_train/1-l": -1.0723810195922852, "rewards_train/1-w": 1.3422880172729492, "rewards_train/2-2": 0.5365325212478638, "rewards_train/2-w": 0.9009736776351929, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.4146690368652344, "rewards_train/margins_1": 0.9356156587600708, "rewards_train/margins_2": -0.3644411563873291, "step": 65 }, { "epoch": 0.19, "logps_train/policy_1_2": -207.46287536621094, "logps_train/policy_1_l": -197.330078125, "logps_train/policy_1_w": -195.2651824951172, "logps_train/policy_2_2": -182.9525909423828, "logps_train/policy_2_w": -222.2279052734375, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -211.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.2005871534347534, "rewards_train/1-l": -1.0482419729232788, "rewards_train/1-w": 1.5966262817382812, "rewards_train/2-2": 1.7063040733337402, "rewards_train/2-w": 0.7842400074005127, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.64486825466156, "rewards_train/margins_1": 0.39603912830352783, "rewards_train/margins_2": 0.9220640659332275, "step": 65 }, { "epoch": 0.19, "logps_train/policy_1_2": -146.52224731445312, "logps_train/policy_1_l": -199.99966430664062, "logps_train/policy_1_w": -127.85453796386719, "logps_train/policy_2_2": -112.4586410522461, "logps_train/policy_2_w": -168.19332885742188, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 0.6536345481872559, "rewards_train/1-l": -2.410414695739746, "rewards_train/1-w": 2.0739216804504395, "rewards_train/2-2": 1.1681981086730957, "rewards_train/2-w": 1.0447285175323486, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.4843363761901855, "rewards_train/margins_1": 1.4202871322631836, "rewards_train/margins_2": 0.12346959114074707, "step": 65 }, { "epoch": 0.19, "logps_train/policy_1_2": -184.7454833984375, "logps_train/policy_1_l": -204.49432373046875, "logps_train/policy_1_w": -158.153564453125, "logps_train/policy_2_2": -154.2432098388672, "logps_train/policy_2_w": -193.24343872070312, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 1.7348248958587646, "rewards_train/1-l": -1.7939643859863281, "rewards_train/1-w": 1.650268793106079, "rewards_train/2-2": 2.3256783485412598, "rewards_train/2-w": 0.985031008720398, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4442331790924072, "rewards_train/margins_1": -0.08455610275268555, "rewards_train/margins_2": 1.3406473398208618, "step": 65 }, { "epoch": 0.19, "logps_train/policy_1_2": -198.6947479248047, "logps_train/policy_1_l": -114.26557159423828, "logps_train/policy_1_w": -155.0702362060547, "logps_train/policy_2_2": -180.77198791503906, "logps_train/policy_2_w": -179.4584197998047, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 0.9977119565010071, "rewards_train/1-l": -0.5164015889167786, "rewards_train/1-w": 1.655476689338684, "rewards_train/2-2": 1.5243638753890991, "rewards_train/2-w": 0.9635334014892578, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.1718782782554626, "rewards_train/margins_1": 0.657764732837677, "rewards_train/margins_2": 0.5608304738998413, "step": 65 }, { "epoch": 0.19, "logps_train/policy_1_2": -175.9449462890625, "logps_train/policy_1_l": -110.01982879638672, "logps_train/policy_1_w": -128.76974487304688, "logps_train/policy_2_2": -140.22732543945312, "logps_train/policy_2_w": -159.60809326171875, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": 0.8652706146240234, "rewards_train/1-l": -0.9998341798782349, "rewards_train/1-w": 2.090993881225586, "rewards_train/2-2": 1.9096895456314087, "rewards_train/2-w": 1.3680963516235352, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.090828061103821, "rewards_train/margins_1": 1.2257232666015625, "rewards_train/margins_2": 0.5415931940078735, "step": 65 }, { "epoch": 0.19, "logps_train/policy_1_2": -229.67181396484375, "logps_train/policy_1_l": -339.56591796875, "logps_train/policy_1_w": -228.14149475097656, "logps_train/policy_2_2": -194.7994384765625, "logps_train/policy_2_w": -279.1968994140625, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -298.0, "logps_train/ref_1_w": -249.0, "logps_train/ref_2_2": -213.0, "logps_train/ref_2_w": -292.0, "rewards_train/1-2": 1.2593815326690674, "rewards_train/1-l": -4.169092178344727, "rewards_train/1-w": 2.1046009063720703, "rewards_train/2-2": 1.8130249977111816, "rewards_train/2-w": 1.291250228881836, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.273693084716797, "rewards_train/margins_1": 0.8452193737030029, "rewards_train/margins_2": 0.5217747688293457, "step": 65 }, { "epoch": 0.2, "learning_rate": 4.96883328387375e-06, "loss": 0.941, "step": 66 }, { "epoch": 0.2, "logps_train/policy_1_2": -125.97085571289062, "logps_train/policy_1_l": -178.56329345703125, "logps_train/policy_1_w": -95.93499755859375, "logps_train/policy_2_2": -105.03907012939453, "logps_train/policy_2_w": -129.25112915039062, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.4400242567062378, "rewards_train/1-l": -2.736016273498535, "rewards_train/1-w": 1.8494685888290405, "rewards_train/2-2": 1.7309077978134155, "rewards_train/2-w": 1.0600440502166748, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.585484862327576, "rewards_train/margins_1": 0.40944433212280273, "rewards_train/margins_2": 0.6708637475967407, "step": 66 }, { "epoch": 0.2, "logps_train/policy_1_2": -124.88645935058594, "logps_train/policy_1_l": -133.2924346923828, "logps_train/policy_1_w": -93.83988189697266, "logps_train/policy_2_2": -104.57362365722656, "logps_train/policy_2_w": -109.6728515625, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -121.5, "rewards_train/1-2": 1.6011977195739746, "rewards_train/1-l": -1.170649528503418, "rewards_train/1-w": 1.6785115003585815, "rewards_train/2-2": 1.7309186458587646, "rewards_train/2-w": 1.1827149391174316, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.8491610288619995, "rewards_train/margins_1": 0.07731378078460693, "rewards_train/margins_2": 0.548203706741333, "step": 66 }, { "epoch": 0.2, "logps_train/policy_1_2": -72.28287506103516, "logps_train/policy_1_l": -124.99629211425781, "logps_train/policy_1_w": -156.58763122558594, "logps_train/policy_2_2": -67.43653869628906, "logps_train/policy_2_w": -180.96527099609375, "logps_train/ref_1_2": -77.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -71.5, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": 0.5055017471313477, "rewards_train/1-l": -1.0838086605072021, "rewards_train/1-w": 2.3099875450134277, "rewards_train/2-2": 0.3983384966850281, "rewards_train/2-w": 1.8753478527069092, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.125, "rewards_train/margins": 3.39379620552063, "rewards_train/margins_1": 1.80448579788208, "rewards_train/margins_2": -1.477009356021881, "step": 66 }, { "epoch": 0.2, "logps_train/policy_1_2": -190.53353881835938, "logps_train/policy_1_l": -250.60867309570312, "logps_train/policy_1_w": -164.38818359375, "logps_train/policy_2_2": -159.23794555664062, "logps_train/policy_2_w": -196.65330505371094, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.9193010330200195, "rewards_train/1-l": -4.045279026031494, "rewards_train/1-w": 2.019774913787842, "rewards_train/2-2": 2.5683934688568115, "rewards_train/2-w": 1.5002938508987427, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 6.065053939819336, "rewards_train/margins_1": 0.10047388076782227, "rewards_train/margins_2": 1.0680996179580688, "step": 66 }, { "epoch": 0.2, "logps_train/policy_1_2": -239.23187255859375, "logps_train/policy_1_l": -276.1604309082031, "logps_train/policy_1_w": -220.0924072265625, "logps_train/policy_2_2": -212.11087036132812, "logps_train/policy_2_w": -253.29969787597656, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -254.0, "logps_train/ref_1_w": -246.0, "logps_train/ref_2_2": -240.0, "logps_train/ref_2_w": -272.0, "rewards_train/1-2": 2.1705634593963623, "rewards_train/1-l": -2.226101875305176, "rewards_train/1-w": 2.5682010650634766, "rewards_train/2-2": 2.7889137268066406, "rewards_train/2-w": 1.9215936660766602, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.794302940368652, "rewards_train/margins_1": 0.39763760566711426, "rewards_train/margins_2": 0.8673200607299805, "step": 66 }, { "epoch": 0.2, "logps_train/policy_1_2": -161.40597534179688, "logps_train/policy_1_l": -113.3278579711914, "logps_train/policy_1_w": -132.60324096679688, "logps_train/policy_2_2": -139.0668182373047, "logps_train/policy_2_w": -168.86376953125, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.0851835012435913, "rewards_train/1-l": -0.7497774362564087, "rewards_train/1-w": 1.8428001403808594, "rewards_train/2-2": 1.6558187007904053, "rewards_train/2-w": 0.6737778186798096, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.592577576637268, "rewards_train/margins_1": 0.7576166391372681, "rewards_train/margins_2": 0.9820408821105957, "step": 66 }, { "epoch": 0.2, "logps_train/policy_1_2": -229.85545349121094, "logps_train/policy_1_l": -230.10316467285156, "logps_train/policy_1_w": -195.3604278564453, "logps_train/policy_2_2": -195.33810424804688, "logps_train/policy_2_w": -231.96978759765625, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -222.0, "logps_train/ref_2_2": -223.0, "logps_train/ref_2_w": -252.0, "rewards_train/1-2": 1.743751049041748, "rewards_train/1-l": -2.362269401550293, "rewards_train/1-w": 2.7139573097229004, "rewards_train/2-2": 2.77829909324646, "rewards_train/2-w": 2.0092720985412598, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.076226711273193, "rewards_train/margins_1": 0.9702062606811523, "rewards_train/margins_2": 0.7690269947052002, "step": 66 }, { "epoch": 0.2, "logps_train/policy_1_2": -153.264892578125, "logps_train/policy_1_l": -185.03973388671875, "logps_train/policy_1_w": -167.6202392578125, "logps_train/policy_2_2": -130.49197387695312, "logps_train/policy_2_w": -184.09288024902344, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": 1.6610114574432373, "rewards_train/1-l": -1.9231135845184326, "rewards_train/1-w": 1.7993035316467285, "rewards_train/2-2": 2.1703338623046875, "rewards_train/2-w": 1.2875868082046509, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.722417116165161, "rewards_train/margins_1": 0.1382920742034912, "rewards_train/margins_2": 0.8827470541000366, "step": 66 }, { "epoch": 0.2, "logps_train/policy_1_2": -153.49179077148438, "logps_train/policy_1_l": -199.34542846679688, "logps_train/policy_1_w": -168.79156494140625, "logps_train/policy_2_2": -128.50350952148438, "logps_train/policy_2_w": -201.72470092773438, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 0.3914448320865631, "rewards_train/1-l": -1.818917155265808, "rewards_train/1-w": 2.192718505859375, "rewards_train/2-2": 1.0308988094329834, "rewards_train/2-w": 1.4619057178497314, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.011635661125183, "rewards_train/margins_1": 1.801273673772812, "rewards_train/margins_2": -0.43100690841674805, "step": 67 }, { "epoch": 0.2, "logps_train/policy_1_2": -210.38916015625, "logps_train/policy_1_l": -277.2013854980469, "logps_train/policy_1_w": -181.37429809570312, "logps_train/policy_2_2": -179.9176025390625, "logps_train/policy_2_w": -209.54200744628906, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -262.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 1.292333960533142, "rewards_train/1-l": -1.5404523611068726, "rewards_train/1-w": 1.8828816413879395, "rewards_train/2-2": 1.9332382678985596, "rewards_train/2-w": 1.4551738500595093, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.423334002494812, "rewards_train/margins_1": 0.5905476808547974, "rewards_train/margins_2": 0.4780644178390503, "step": 67 }, { "epoch": 0.2, "logps_train/policy_1_2": -223.49066162109375, "logps_train/policy_1_l": -195.17532348632812, "logps_train/policy_1_w": -190.61964416503906, "logps_train/policy_2_2": -185.7642822265625, "logps_train/policy_2_w": -226.77438354492188, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 1.7368717193603516, "rewards_train/1-l": -2.2159695625305176, "rewards_train/1-w": 2.7911605834960938, "rewards_train/2-2": 2.5899782180786133, "rewards_train/2-w": 1.878812313079834, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.007130146026611, "rewards_train/margins_1": 1.0542888641357422, "rewards_train/margins_2": 0.7111659049987793, "step": 67 }, { "epoch": 0.2, "logps_train/policy_1_2": -122.50125122070312, "logps_train/policy_1_l": -91.58492279052734, "logps_train/policy_1_w": -142.54049682617188, "logps_train/policy_2_2": -101.39472961425781, "logps_train/policy_2_w": -168.77627563476562, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.1217501163482666, "rewards_train/1-l": -0.8600553870201111, "rewards_train/1-w": 2.558450698852539, "rewards_train/2-2": 1.7074016332626343, "rewards_train/2-w": 1.946592092514038, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.41850608587265, "rewards_train/margins_1": 1.4367005825042725, "rewards_train/margins_2": -0.2391904592514038, "step": 67 }, { "epoch": 0.2, "logps_train/policy_1_2": -125.31990051269531, "logps_train/policy_1_l": -101.11502075195312, "logps_train/policy_1_w": -72.07980346679688, "logps_train/policy_2_2": -100.24418640136719, "logps_train/policy_2_w": -89.47694396972656, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 0.675040602684021, "rewards_train/1-l": -1.189626693725586, "rewards_train/1-w": 1.2154573202133179, "rewards_train/2-2": 1.4912059307098389, "rewards_train/2-w": 0.839805006980896, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.405084013938904, "rewards_train/margins_1": 0.5404167175292969, "rewards_train/margins_2": 0.6514009237289429, "step": 67 }, { "epoch": 0.2, "logps_train/policy_1_2": -86.46442413330078, "logps_train/policy_1_l": -115.59162902832031, "logps_train/policy_1_w": -90.21214294433594, "logps_train/policy_2_2": -71.60006713867188, "logps_train/policy_2_w": -110.90087890625, "logps_train/ref_1_2": -93.5, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -81.5, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 0.6996515393257141, "rewards_train/1-l": -0.8356276750564575, "rewards_train/1-w": 1.3781019449234009, "rewards_train/2-2": 0.9669467210769653, "rewards_train/2-w": 0.9560055136680603, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.2137296199798584, "rewards_train/margins_1": 0.6784504055976868, "rewards_train/margins_2": 0.01094120740890503, "step": 67 }, { "epoch": 0.2, "logps_train/policy_1_2": -93.95905303955078, "logps_train/policy_1_l": -93.28784942626953, "logps_train/policy_1_w": -74.52517700195312, "logps_train/policy_2_2": -75.92456817626953, "logps_train/policy_2_w": -89.82644653320312, "logps_train/ref_1_2": -103.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -83.0, "logps_train/ref_2_2": -90.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": 0.9169855117797852, "rewards_train/1-l": -0.9447270035743713, "rewards_train/1-w": 0.8469692468643188, "rewards_train/2-2": 1.3887929916381836, "rewards_train/2-w": 0.3439178168773651, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.7916962504386902, "rewards_train/margins_1": -0.07001626491546631, "rewards_train/margins_2": 1.0448751747608185, "step": 67 }, { "epoch": 0.2, "logps_train/policy_1_2": -162.44793701171875, "logps_train/policy_1_l": -234.99908447265625, "logps_train/policy_1_w": -171.65261840820312, "logps_train/policy_2_2": -136.75936889648438, "logps_train/policy_2_w": -197.83926391601562, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -215.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.877082347869873, "rewards_train/1-l": -1.9741268157958984, "rewards_train/1-w": 2.0105204582214355, "rewards_train/2-2": 2.530313014984131, "rewards_train/2-w": 1.3293547630310059, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.984647274017334, "rewards_train/margins_1": 0.1334381103515625, "rewards_train/margins_2": 1.200958251953125, "step": 67 }, { "epoch": 0.2, "learning_rate": 4.96482514566587e-06, "loss": 0.819, "step": 68 }, { "epoch": 0.2, "logps_train/policy_1_2": -313.53173828125, "logps_train/policy_1_l": -217.17410278320312, "logps_train/policy_1_w": -246.04580688476562, "logps_train/policy_2_2": -268.39532470703125, "logps_train/policy_2_w": -301.93255615234375, "logps_train/ref_1_2": -336.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -278.0, "logps_train/ref_2_2": -304.0, "logps_train/ref_2_w": -322.0, "rewards_train/1-2": 2.165574789047241, "rewards_train/1-l": -1.8986592292785645, "rewards_train/1-w": 3.201669216156006, "rewards_train/2-2": 3.5198419094085693, "rewards_train/2-w": 1.8879966735839844, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.10032844543457, "rewards_train/margins_1": 1.0360944271087646, "rewards_train/margins_2": 1.631845235824585, "step": 68 }, { "epoch": 0.2, "logps_train/policy_1_2": -181.1165771484375, "logps_train/policy_1_l": -152.7281494140625, "logps_train/policy_1_w": -119.97210693359375, "logps_train/policy_2_2": -152.185302734375, "logps_train/policy_2_w": -147.49371337890625, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.3352184295654297, "rewards_train/1-l": -1.6368763446807861, "rewards_train/1-w": 0.8121644258499146, "rewards_train/2-2": 1.9377200603485107, "rewards_train/2-w": 0.44281643629074097, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.4490407705307007, "rewards_train/margins_1": -0.5230540037155151, "rewards_train/margins_2": 1.4949036240577698, "step": 68 }, { "epoch": 0.2, "logps_train/policy_1_2": -257.7920227050781, "logps_train/policy_1_l": -186.1868896484375, "logps_train/policy_1_w": -154.04632568359375, "logps_train/policy_2_2": -211.356201171875, "logps_train/policy_2_w": -197.3063201904297, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -226.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 0.1879856437444687, "rewards_train/1-l": -1.083922028541565, "rewards_train/1-w": 1.9785716533660889, "rewards_train/2-2": 1.461646318435669, "rewards_train/2-w": 1.3486649990081787, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.062493681907654, "rewards_train/margins_1": 1.7905860096216202, "rewards_train/margins_2": 0.11298131942749023, "step": 68 }, { "epoch": 0.2, "logps_train/policy_1_2": -145.26028442382812, "logps_train/policy_1_l": -169.07077026367188, "logps_train/policy_1_w": -121.97225952148438, "logps_train/policy_2_2": -122.12487030029297, "logps_train/policy_2_w": -148.69326782226562, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 0.7825651168823242, "rewards_train/1-l": -2.0076632499694824, "rewards_train/1-w": 1.3234773874282837, "rewards_train/2-2": 1.1511845588684082, "rewards_train/2-w": 0.725203812122345, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.331140637397766, "rewards_train/margins_1": 0.5409122705459595, "rewards_train/margins_2": 0.42598074674606323, "step": 68 }, { "epoch": 0.2, "logps_train/policy_1_2": -176.8416748046875, "logps_train/policy_1_l": -137.34609985351562, "logps_train/policy_1_w": -131.22796630859375, "logps_train/policy_2_2": -153.03311157226562, "logps_train/policy_2_w": -161.017333984375, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": 1.9744259119033813, "rewards_train/1-l": -1.656874656677246, "rewards_train/1-w": 1.851030945777893, "rewards_train/2-2": 2.706064462661743, "rewards_train/2-w": 0.9521735906600952, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.507905602455139, "rewards_train/margins_1": -0.12339496612548828, "rewards_train/margins_2": 1.753890872001648, "step": 68 }, { "epoch": 0.2, "logps_train/policy_1_2": -88.51042175292969, "logps_train/policy_1_l": -147.8496856689453, "logps_train/policy_1_w": -104.53302764892578, "logps_train/policy_2_2": -78.52433776855469, "logps_train/policy_2_w": -120.37898254394531, "logps_train/ref_1_2": -92.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -83.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 0.325519859790802, "rewards_train/1-l": -0.8873131275177002, "rewards_train/1-w": 1.1793144941329956, "rewards_train/2-2": 0.4585033357143402, "rewards_train/2-w": 0.6720627546310425, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.066627621650696, "rewards_train/margins_1": 0.8537946343421936, "rewards_train/margins_2": -0.21355941891670227, "step": 68 }, { "epoch": 0.2, "logps_train/policy_1_2": -178.82057189941406, "logps_train/policy_1_l": -246.306640625, "logps_train/policy_1_w": -185.77963256835938, "logps_train/policy_2_2": -144.20358276367188, "logps_train/policy_2_w": -230.05950927734375, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -224.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -239.0, "rewards_train/1-2": 0.6741925477981567, "rewards_train/1-l": -2.1994142532348633, "rewards_train/1-w": 1.8650054931640625, "rewards_train/2-2": 1.4983912706375122, "rewards_train/2-w": 0.8526424169540405, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.064419746398926, "rewards_train/margins_1": 1.1908129453659058, "rewards_train/margins_2": 0.6457488536834717, "step": 68 }, { "epoch": 0.2, "logps_train/policy_1_2": -238.18885803222656, "logps_train/policy_1_l": -250.37481689453125, "logps_train/policy_1_w": -195.997314453125, "logps_train/policy_2_2": -198.81097412109375, "logps_train/policy_2_w": -234.0225372314453, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -221.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 1.1377540826797485, "rewards_train/1-l": -2.394123077392578, "rewards_train/1-w": 2.5198001861572266, "rewards_train/2-2": 1.7152889966964722, "rewards_train/2-w": 1.3813395500183105, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.913923263549805, "rewards_train/margins_1": 1.382046103477478, "rewards_train/margins_2": 0.3339494466781616, "step": 68 }, { "epoch": 0.21, "logps_train/policy_1_2": -112.49543762207031, "logps_train/policy_1_l": -78.63066101074219, "logps_train/policy_1_w": -57.13618850708008, "logps_train/policy_2_2": -87.77255249023438, "logps_train/policy_2_w": -75.07691192626953, "logps_train/ref_1_2": -122.5, "logps_train/ref_1_l": -70.5, "logps_train/ref_1_w": -66.0, "logps_train/ref_2_2": -101.5, "logps_train/ref_2_w": -79.5, "rewards_train/1-2": 0.986393392086029, "rewards_train/1-l": -0.8185350894927979, "rewards_train/1-w": 0.8746623992919922, "rewards_train/2-2": 1.3705971240997314, "rewards_train/2-w": 0.44631290435791016, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.69319748878479, "rewards_train/margins_1": -0.11173099279403687, "rewards_train/margins_2": 0.9242842197418213, "step": 69 }, { "epoch": 0.21, "logps_train/policy_1_2": -129.61209106445312, "logps_train/policy_1_l": -130.2786407470703, "logps_train/policy_1_w": -129.701416015625, "logps_train/policy_2_2": -113.9386978149414, "logps_train/policy_2_w": -153.61434936523438, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.5184780359268188, "rewards_train/1-l": -0.9700518846511841, "rewards_train/1-w": 1.5267329216003418, "rewards_train/2-2": 1.6326934099197388, "rewards_train/2-w": 0.9276266694068909, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.496784806251526, "rewards_train/margins_1": 0.00825488567352295, "rewards_train/margins_2": 0.7050667405128479, "step": 69 }, { "epoch": 0.21, "logps_train/policy_1_2": -99.4272232055664, "logps_train/policy_1_l": -111.08766174316406, "logps_train/policy_1_w": -94.33316802978516, "logps_train/policy_2_2": -73.98184204101562, "logps_train/policy_2_w": -125.54884338378906, "logps_train/ref_1_2": -101.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -81.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.1518092155456543, "rewards_train/1-l": -0.8088147640228271, "rewards_train/1-w": 1.718245029449463, "rewards_train/2-2": 0.7483001351356506, "rewards_train/2-w": 1.0662097930908203, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.52705979347229, "rewards_train/margins_1": 1.5664358139038086, "rewards_train/margins_2": -0.3179096579551697, "step": 69 }, { "epoch": 0.21, "logps_train/policy_1_2": -191.48887634277344, "logps_train/policy_1_l": -200.79116821289062, "logps_train/policy_1_w": -153.7152099609375, "logps_train/policy_2_2": -173.09629821777344, "logps_train/policy_2_w": -179.8748779296875, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.3386127948760986, "rewards_train/1-l": -1.38224196434021, "rewards_train/1-w": 2.1237921714782715, "rewards_train/2-2": 1.8013076782226562, "rewards_train/2-w": 1.4250125885009766, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.5060341358184814, "rewards_train/margins_1": 0.7851793766021729, "rewards_train/margins_2": 0.3762950897216797, "step": 69 }, { "epoch": 0.21, "logps_train/policy_1_2": -164.5007781982422, "logps_train/policy_1_l": -173.57534790039062, "logps_train/policy_1_w": -133.81820678710938, "logps_train/policy_2_2": -124.37677001953125, "logps_train/policy_2_w": -173.5875244140625, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 0.5835168957710266, "rewards_train/1-l": -0.915347695350647, "rewards_train/1-w": 2.3329250812530518, "rewards_train/2-2": 1.6025567054748535, "rewards_train/2-w": 1.5908563137054443, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.2482727766036987, "rewards_train/margins_1": 1.7494081854820251, "rewards_train/margins_2": 0.01170039176940918, "step": 69 }, { "epoch": 0.21, "logps_train/policy_1_2": -133.42898559570312, "logps_train/policy_1_l": -166.61749267578125, "logps_train/policy_1_w": -118.52629852294922, "logps_train/policy_2_2": -100.71873474121094, "logps_train/policy_2_w": -141.39761352539062, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.2180390357971191, "rewards_train/1-l": -1.9812794923782349, "rewards_train/1-w": 1.9094793796539307, "rewards_train/2-2": 1.5796890258789062, "rewards_train/2-w": 1.2328959703445435, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8907588720321655, "rewards_train/margins_1": 0.6914403438568115, "rewards_train/margins_2": 0.3467930555343628, "step": 69 }, { "epoch": 0.21, "logps_train/policy_1_2": -126.86778259277344, "logps_train/policy_1_l": -143.02932739257812, "logps_train/policy_1_w": -120.50401306152344, "logps_train/policy_2_2": -108.67317962646484, "logps_train/policy_2_w": -152.65992736816406, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 0.48665833473205566, "rewards_train/1-l": -1.0255885124206543, "rewards_train/1-w": 1.7167850732803345, "rewards_train/2-2": 0.7411775588989258, "rewards_train/2-w": 1.247190237045288, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.7423735857009888, "rewards_train/margins_1": 1.2301267385482788, "rewards_train/margins_2": -0.5060126781463623, "step": 69 }, { "epoch": 0.21, "logps_train/policy_1_2": -132.7100830078125, "logps_train/policy_1_l": -188.27288818359375, "logps_train/policy_1_w": -69.16447448730469, "logps_train/policy_2_2": -109.57576751708984, "logps_train/policy_2_w": -81.84056854248047, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -75.5, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -88.0, "rewards_train/1-2": 0.8094596266746521, "rewards_train/1-l": -1.6503357887268066, "rewards_train/1-w": 0.628865122795105, "rewards_train/2-2": 1.326016902923584, "rewards_train/2-w": 0.6331306099891663, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.2792009115219116, "rewards_train/margins_1": -0.18059450387954712, "rewards_train/margins_2": 0.6928862929344177, "step": 69 }, { "epoch": 0.21, "learning_rate": 4.960576444868992e-06, "loss": 0.9228, "step": 70 }, { "epoch": 0.21, "logps_train/policy_1_2": -161.73484802246094, "logps_train/policy_1_l": -184.82492065429688, "logps_train/policy_1_w": -143.9928436279297, "logps_train/policy_2_2": -121.77189636230469, "logps_train/policy_2_w": -190.88525390625, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.02885901927948, "rewards_train/1-l": -1.9277070760726929, "rewards_train/1-w": 1.5389965772628784, "rewards_train/2-2": 2.1396076679229736, "rewards_train/2-w": 1.0017082691192627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4667036533355713, "rewards_train/margins_1": 0.5101375579833984, "rewards_train/margins_2": 1.137899398803711, "step": 70 }, { "epoch": 0.21, "logps_train/policy_1_2": -163.01016235351562, "logps_train/policy_1_l": -244.46505737304688, "logps_train/policy_1_w": -248.3584747314453, "logps_train/policy_2_2": -133.0597686767578, "logps_train/policy_2_w": -285.5303955078125, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -278.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -304.0, "rewards_train/1-2": 1.1052334308624268, "rewards_train/1-l": -1.954709529876709, "rewards_train/1-w": 2.9860284328460693, "rewards_train/2-2": 1.6112098693847656, "rewards_train/2-w": 1.8813331127166748, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.940737962722778, "rewards_train/margins_1": 1.8807950019836426, "rewards_train/margins_2": -0.2701232433319092, "step": 70 }, { "epoch": 0.21, "logps_train/policy_1_2": -149.666015625, "logps_train/policy_1_l": -201.56082153320312, "logps_train/policy_1_w": -129.4357147216797, "logps_train/policy_2_2": -125.9477310180664, "logps_train/policy_2_w": -167.87115478515625, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.0025384426116943, "rewards_train/1-l": -2.344167709350586, "rewards_train/1-w": 1.8236159086227417, "rewards_train/2-2": 1.485695242881775, "rewards_train/2-w": 0.9550713300704956, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.167783617973328, "rewards_train/margins_1": 0.8210774660110474, "rewards_train/margins_2": 0.5306239128112793, "step": 70 }, { "epoch": 0.21, "logps_train/policy_1_2": -240.70306396484375, "logps_train/policy_1_l": -265.6791076660156, "logps_train/policy_1_w": -243.94430541992188, "logps_train/policy_2_2": -202.35568237304688, "logps_train/policy_2_w": -300.9085998535156, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -236.0, "logps_train/ref_1_w": -282.0, "logps_train/ref_2_2": -229.0, "logps_train/ref_2_w": -320.0, "rewards_train/1-2": 1.8921937942504883, "rewards_train/1-l": -3.0147852897644043, "rewards_train/1-w": 3.8555703163146973, "rewards_train/2-2": 2.639432668685913, "rewards_train/2-w": 1.8153908252716064, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.870355606079102, "rewards_train/margins_1": 1.963376522064209, "rewards_train/margins_2": 0.8240418434143066, "step": 70 }, { "epoch": 0.21, "logps_train/policy_1_2": -144.63629150390625, "logps_train/policy_1_l": -179.94625854492188, "logps_train/policy_1_w": -135.48988342285156, "logps_train/policy_2_2": -123.4703140258789, "logps_train/policy_2_w": -167.10745239257812, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.4070740938186646, "rewards_train/1-l": -1.7553191184997559, "rewards_train/1-w": 2.106480360031128, "rewards_train/2-2": 1.8045306205749512, "rewards_train/2-w": 1.509958267211914, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.861799478530884, "rewards_train/margins_1": 0.6994062662124634, "rewards_train/margins_2": 0.2945723533630371, "step": 70 }, { "epoch": 0.21, "logps_train/policy_1_2": -126.52334594726562, "logps_train/policy_1_l": -129.48858642578125, "logps_train/policy_1_w": -114.8970947265625, "logps_train/policy_2_2": -103.05770874023438, "logps_train/policy_2_w": -142.27316284179688, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -118.5, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.110164999961853, "rewards_train/1-l": -1.0910465717315674, "rewards_train/1-w": 2.2192745208740234, "rewards_train/2-2": 1.564150333404541, "rewards_train/2-w": 1.4140903949737549, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.310321092605591, "rewards_train/margins_1": 1.1091095209121704, "rewards_train/margins_2": 0.15005993843078613, "step": 70 }, { "epoch": 0.21, "logps_train/policy_1_2": -197.5233917236328, "logps_train/policy_1_l": -246.4835205078125, "logps_train/policy_1_w": -231.61376953125, "logps_train/policy_2_2": -167.7606201171875, "logps_train/policy_2_w": -268.7998046875, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -262.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -290.0, "rewards_train/1-2": 1.7835978269577026, "rewards_train/1-l": -2.3608522415161133, "rewards_train/1-w": 3.0927236080169678, "rewards_train/2-2": 2.30987548828125, "rewards_train/2-w": 2.047170639038086, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.453575849533081, "rewards_train/margins_1": 1.3091257810592651, "rewards_train/margins_2": 0.26270484924316406, "step": 70 }, { "epoch": 0.21, "logps_train/policy_1_2": -69.89010620117188, "logps_train/policy_1_l": -55.51377487182617, "logps_train/policy_1_w": -57.1497917175293, "logps_train/policy_2_2": -54.70263671875, "logps_train/policy_2_w": -73.28077697753906, "logps_train/ref_1_2": -78.0, "logps_train/ref_1_l": -49.5, "logps_train/ref_1_w": -67.5, "logps_train/ref_2_2": -66.5, "logps_train/ref_2_w": -79.0, "rewards_train/1-2": 0.7820830345153809, "rewards_train/1-l": -0.5898542404174805, "rewards_train/1-w": 1.0237655639648438, "rewards_train/2-2": 1.189306616783142, "rewards_train/2-w": 0.594773530960083, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.6136198043823242, "rewards_train/margins_1": 0.2416825294494629, "rewards_train/margins_2": 0.5945330858230591, "step": 70 }, { "epoch": 0.21, "logps_train/policy_1_2": -118.4405288696289, "logps_train/policy_1_l": -107.960693359375, "logps_train/policy_1_w": -76.02961730957031, "logps_train/policy_2_2": -93.01811218261719, "logps_train/policy_2_w": -95.18064880371094, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -107.5, "rewards_train/1-2": 0.990322470664978, "rewards_train/1-l": -0.9529058933258057, "rewards_train/1-w": 1.62985098361969, "rewards_train/2-2": 1.6497516632080078, "rewards_train/2-w": 1.2366230487823486, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.5827568769454956, "rewards_train/margins_1": 0.6395285129547119, "rewards_train/margins_2": 0.4131286144256592, "step": 71 }, { "epoch": 0.21, "logps_train/policy_1_2": -102.02249145507812, "logps_train/policy_1_l": -108.45088958740234, "logps_train/policy_1_w": -80.12689208984375, "logps_train/policy_2_2": -87.42802429199219, "logps_train/policy_2_w": -92.92792510986328, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -103.0, "rewards_train/1-2": 0.717477023601532, "rewards_train/1-l": -1.1802453994750977, "rewards_train/1-w": 1.0953187942504883, "rewards_train/2-2": 1.1030956506729126, "rewards_train/2-w": 1.0325005054473877, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.275564193725586, "rewards_train/margins_1": 0.3778417706489563, "rewards_train/margins_2": 0.0705951452255249, "step": 71 }, { "epoch": 0.21, "logps_train/policy_1_2": -131.4176025390625, "logps_train/policy_1_l": -168.125244140625, "logps_train/policy_1_w": -116.63872528076172, "logps_train/policy_2_2": -107.03162384033203, "logps_train/policy_2_w": -141.87850952148438, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 1.4676141738891602, "rewards_train/1-l": -1.5033438205718994, "rewards_train/1-w": 2.1306591033935547, "rewards_train/2-2": 1.8929316997528076, "rewards_train/2-w": 1.703554391860962, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.634002923965454, "rewards_train/margins_1": 0.6630449295043945, "rewards_train/margins_2": 0.1893773078918457, "step": 71 }, { "epoch": 0.21, "logps_train/policy_1_2": -82.36504364013672, "logps_train/policy_1_l": -58.64190673828125, "logps_train/policy_1_w": -68.90696716308594, "logps_train/policy_2_2": -68.95384216308594, "logps_train/policy_2_w": -86.70050048828125, "logps_train/ref_1_2": -88.5, "logps_train/ref_1_l": -55.5, "logps_train/ref_1_w": -74.0, "logps_train/ref_2_2": -78.0, "logps_train/ref_2_w": -89.5, "rewards_train/1-2": 0.6244335174560547, "rewards_train/1-l": -0.30540138483047485, "rewards_train/1-w": 0.5073508024215698, "rewards_train/2-2": 0.8897720575332642, "rewards_train/2-w": 0.2760441303253174, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.8127521872520447, "rewards_train/margins_1": -0.11708271503448486, "rewards_train/margins_2": 0.6137279272079468, "step": 71 }, { "epoch": 0.21, "logps_train/policy_1_2": -130.18975830078125, "logps_train/policy_1_l": -129.18020629882812, "logps_train/policy_1_w": -106.06981658935547, "logps_train/policy_2_2": -97.98951721191406, "logps_train/policy_2_w": -134.00930786132812, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 0.876337468624115, "rewards_train/1-l": -0.7771997451782227, "rewards_train/1-w": 2.33676815032959, "rewards_train/2-2": 1.355736255645752, "rewards_train/2-w": 1.6053192615509033, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.1139678955078125, "rewards_train/margins_1": 1.4604306817054749, "rewards_train/margins_2": -0.24958300590515137, "step": 71 }, { "epoch": 0.21, "logps_train/policy_1_2": -91.16124725341797, "logps_train/policy_1_l": -71.43968963623047, "logps_train/policy_1_w": -58.62785339355469, "logps_train/policy_2_2": -69.81463623046875, "logps_train/policy_2_w": -83.14677429199219, "logps_train/ref_1_2": -99.0, "logps_train/ref_1_l": -65.0, "logps_train/ref_1_w": -69.0, "logps_train/ref_2_2": -83.0, "logps_train/ref_2_w": -89.5, "rewards_train/1-2": 0.746375322341919, "rewards_train/1-l": -0.6463123559951782, "rewards_train/1-w": 1.0375077724456787, "rewards_train/2-2": 1.2767391204833984, "rewards_train/2-w": 0.6425492763519287, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.683820128440857, "rewards_train/margins_1": 0.29113245010375977, "rewards_train/margins_2": 0.6341898441314697, "step": 71 }, { "epoch": 0.21, "logps_train/policy_1_2": -192.02859497070312, "logps_train/policy_1_l": -201.50025939941406, "logps_train/policy_1_w": -101.21047973632812, "logps_train/policy_2_2": -162.03085327148438, "logps_train/policy_2_w": -117.32492065429688, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.1932344436645508, "rewards_train/1-l": -2.0627214908599854, "rewards_train/1-w": 1.4687952995300293, "rewards_train/2-2": 2.054335117340088, "rewards_train/2-w": 1.2300082445144653, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.5315167903900146, "rewards_train/margins_1": 0.2755608558654785, "rewards_train/margins_2": 0.8243268728256226, "step": 71 }, { "epoch": 0.21, "logps_train/policy_1_2": -122.7889404296875, "logps_train/policy_1_l": -145.05023193359375, "logps_train/policy_1_w": -110.50228881835938, "logps_train/policy_2_2": -111.69950103759766, "logps_train/policy_2_w": -139.77926635742188, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 0.6125127077102661, "rewards_train/1-l": -0.4499438405036926, "rewards_train/1-w": 1.7263343334197998, "rewards_train/2-2": 0.7534877061843872, "rewards_train/2-w": 1.0236364603042603, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.1762781739234924, "rewards_train/margins_1": 1.1138216257095337, "rewards_train/margins_2": -0.27014875411987305, "step": 71 }, { "epoch": 0.22, "learning_rate": 4.956087596148824e-06, "loss": 0.8625, "step": 72 }, { "epoch": 0.22, "logps_train/policy_1_2": -231.5345458984375, "logps_train/policy_1_l": -237.44036865234375, "logps_train/policy_1_w": -170.2400360107422, "logps_train/policy_2_2": -206.28033447265625, "logps_train/policy_2_w": -208.30787658691406, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -197.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 1.877795934677124, "rewards_train/1-l": -2.9151313304901123, "rewards_train/1-w": 2.6541218757629395, "rewards_train/2-2": 2.190715789794922, "rewards_train/2-w": 1.7317113876342773, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.569253206253052, "rewards_train/margins_1": 0.7763259410858154, "rewards_train/margins_2": 0.45900440216064453, "step": 72 }, { "epoch": 0.22, "logps_train/policy_1_2": -188.33009338378906, "logps_train/policy_1_l": -174.3564453125, "logps_train/policy_1_w": -112.08352661132812, "logps_train/policy_2_2": -150.1802978515625, "logps_train/policy_2_w": -149.77439880371094, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 0.8951157331466675, "rewards_train/1-l": -1.7200193405151367, "rewards_train/1-w": 1.7103971242904663, "rewards_train/2-2": 1.728844165802002, "rewards_train/2-w": 0.7475599646568298, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.430416464805603, "rewards_train/margins_1": 0.8152813911437988, "rewards_train/margins_2": 0.9812842011451721, "step": 72 }, { "epoch": 0.22, "logps_train/policy_1_2": -112.22296905517578, "logps_train/policy_1_l": -130.39291381835938, "logps_train/policy_1_w": -109.77505493164062, "logps_train/policy_2_2": -94.03854370117188, "logps_train/policy_2_w": -135.8925018310547, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -105.5, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 0.7839529514312744, "rewards_train/1-l": -0.9174153804779053, "rewards_train/1-w": 1.296323299407959, "rewards_train/2-2": 1.1492704153060913, "rewards_train/2-w": 0.7959063649177551, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.2137386798858643, "rewards_train/margins_1": 0.5123703479766846, "rewards_train/margins_2": 0.3533640503883362, "step": 72 }, { "epoch": 0.22, "logps_train/policy_1_2": -214.41729736328125, "logps_train/policy_1_l": -160.04214477539062, "logps_train/policy_1_w": -160.2370147705078, "logps_train/policy_2_2": -184.1993408203125, "logps_train/policy_2_w": -193.73751831054688, "logps_train/ref_1_2": -229.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.4629582166671753, "rewards_train/1-l": -0.7792159914970398, "rewards_train/1-w": 2.44856333732605, "rewards_train/2-2": 2.4921741485595703, "rewards_train/2-w": 1.8332806825637817, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.2277793288230896, "rewards_train/margins_1": 0.9856051206588745, "rewards_train/margins_2": 0.6588934659957886, "step": 72 }, { "epoch": 0.22, "logps_train/policy_1_2": -102.18940734863281, "logps_train/policy_1_l": -86.76597595214844, "logps_train/policy_1_w": -76.94207000732422, "logps_train/policy_2_2": -75.46340942382812, "logps_train/policy_2_w": -110.3135986328125, "logps_train/ref_1_2": -106.5, "logps_train/ref_1_l": -77.5, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -88.5, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 0.4392622411251068, "rewards_train/1-l": -0.9424179196357727, "rewards_train/1-w": 1.430793285369873, "rewards_train/2-2": 1.3126435279846191, "rewards_train/2-w": 0.8424680829048157, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.3732112050056458, "rewards_train/margins_1": 0.9915310442447662, "rewards_train/margins_2": 0.47017544507980347, "step": 72 }, { "epoch": 0.22, "logps_train/policy_1_2": -99.89906311035156, "logps_train/policy_1_l": -207.23785400390625, "logps_train/policy_1_w": -83.32608032226562, "logps_train/policy_2_2": -81.4173583984375, "logps_train/policy_2_w": -109.31830596923828, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 0.9335311055183411, "rewards_train/1-l": -2.841071128845215, "rewards_train/1-w": 1.6908290386199951, "rewards_train/2-2": 1.3371706008911133, "rewards_train/2-w": 0.9931689500808716, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.53190016746521, "rewards_train/margins_1": 0.757297933101654, "rewards_train/margins_2": 0.3440016508102417, "step": 72 }, { "epoch": 0.22, "logps_train/policy_1_2": -177.56663513183594, "logps_train/policy_1_l": -215.81796264648438, "logps_train/policy_1_w": -213.06129455566406, "logps_train/policy_2_2": -155.27786254882812, "logps_train/policy_2_w": -241.1137237548828, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -201.0, "logps_train/ref_1_w": -239.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 1.2995860576629639, "rewards_train/1-l": -1.5096781253814697, "rewards_train/1-w": 2.590745449066162, "rewards_train/2-2": 1.9472131729125977, "rewards_train/2-w": 1.8980026245117188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.100423574447632, "rewards_train/margins_1": 1.2911593914031982, "rewards_train/margins_2": 0.049210548400878906, "step": 72 }, { "epoch": 0.22, "logps_train/policy_1_2": -193.7518310546875, "logps_train/policy_1_l": -189.01254272460938, "logps_train/policy_1_w": -148.5290985107422, "logps_train/policy_2_2": -155.40345764160156, "logps_train/policy_2_w": -178.91209411621094, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 0.7949352264404297, "rewards_train/1-l": -2.207894802093506, "rewards_train/1-w": 1.829707384109497, "rewards_train/2-2": 2.1207869052886963, "rewards_train/2-w": 1.3324240446090698, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.037602186203003, "rewards_train/margins_1": 1.0347721576690674, "rewards_train/margins_2": 0.7883628606796265, "step": 72 }, { "epoch": 0.22, "logps_train/policy_1_2": -148.36737060546875, "logps_train/policy_1_l": -167.97572326660156, "logps_train/policy_1_w": -155.1208953857422, "logps_train/policy_2_2": -116.75283813476562, "logps_train/policy_2_w": -188.35968017578125, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 0.974200427532196, "rewards_train/1-l": -2.246009588241577, "rewards_train/1-w": 2.683222770690918, "rewards_train/2-2": 1.8184657096862793, "rewards_train/2-w": 1.715593934059143, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.929232358932495, "rewards_train/margins_1": 1.709022343158722, "rewards_train/margins_2": 0.10287177562713623, "step": 73 }, { "epoch": 0.22, "logps_train/policy_1_2": -150.17318725585938, "logps_train/policy_1_l": -193.58633422851562, "logps_train/policy_1_w": -113.42357635498047, "logps_train/policy_2_2": -121.33139038085938, "logps_train/policy_2_w": -148.1533203125, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 0.8733053803443909, "rewards_train/1-l": -1.525331735610962, "rewards_train/1-w": 1.9763920307159424, "rewards_train/2-2": 1.460611343383789, "rewards_train/2-w": 1.2721669673919678, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.5017237663269043, "rewards_train/margins_1": 1.1030866503715515, "rewards_train/margins_2": 0.1884443759918213, "step": 73 }, { "epoch": 0.22, "logps_train/policy_1_2": -81.54144287109375, "logps_train/policy_1_l": -103.1290283203125, "logps_train/policy_1_w": -126.87882995605469, "logps_train/policy_2_2": -62.817054748535156, "logps_train/policy_2_w": -165.27403259277344, "logps_train/ref_1_2": -85.5, "logps_train/ref_1_l": -99.5, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -69.5, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 0.4005427658557892, "rewards_train/1-l": -0.34647154808044434, "rewards_train/1-w": 1.781649112701416, "rewards_train/2-2": 0.667513370513916, "rewards_train/2-w": 1.085096836090088, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.1281206607818604, "rewards_train/margins_1": 1.3811063468456268, "rewards_train/margins_2": -0.4175834655761719, "step": 73 }, { "epoch": 0.22, "logps_train/policy_1_2": -95.91361999511719, "logps_train/policy_1_l": -112.3645248413086, "logps_train/policy_1_w": -80.82137298583984, "logps_train/policy_2_2": -82.5359115600586, "logps_train/policy_2_w": -91.65676879882812, "logps_train/ref_1_2": -108.5, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -97.0, "rewards_train/1-2": 1.268794059753418, "rewards_train/1-l": -0.40676552057266235, "rewards_train/1-w": 0.8928627371788025, "rewards_train/2-2": 1.5143778324127197, "rewards_train/2-w": 0.5429165363311768, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.2996282577514648, "rewards_train/margins_1": -0.3759313225746155, "rewards_train/margins_2": 0.971461296081543, "step": 73 }, { "epoch": 0.22, "logps_train/policy_1_2": -189.419921875, "logps_train/policy_1_l": -212.1388397216797, "logps_train/policy_1_w": -170.1813201904297, "logps_train/policy_2_2": -161.33250427246094, "logps_train/policy_2_w": -209.96368408203125, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -199.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 2.158008575439453, "rewards_train/1-l": -1.3525561094284058, "rewards_train/1-w": 2.1211256980895996, "rewards_train/2-2": 2.813624143600464, "rewards_train/2-w": 1.1708199977874756, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4736818075180054, "rewards_train/margins_1": -0.036882877349853516, "rewards_train/margins_2": 1.6428041458129883, "step": 73 }, { "epoch": 0.22, "logps_train/policy_1_2": -183.3044891357422, "logps_train/policy_1_l": -162.74072265625, "logps_train/policy_1_w": -172.72119140625, "logps_train/policy_2_2": -157.08338928222656, "logps_train/policy_2_w": -206.51478576660156, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": 1.6570512056350708, "rewards_train/1-l": 0.0854974091053009, "rewards_train/1-w": 1.974755883216858, "rewards_train/2-2": 2.1260359287261963, "rewards_train/2-w": 1.2469589710235596, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.889258474111557, "rewards_train/margins_1": 0.3177046775817871, "rewards_train/margins_2": 0.8790769577026367, "step": 73 }, { "epoch": 0.22, "logps_train/policy_1_2": -188.51663208007812, "logps_train/policy_1_l": -194.1963348388672, "logps_train/policy_1_w": -95.79209899902344, "logps_train/policy_2_2": -154.19668579101562, "logps_train/policy_2_w": -117.52210235595703, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.1069310903549194, "rewards_train/1-l": -2.320413589477539, "rewards_train/1-w": 1.7934463024139404, "rewards_train/2-2": 1.8943932056427002, "rewards_train/2-w": 1.402477502822876, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.1138598918914795, "rewards_train/margins_1": 0.686515212059021, "rewards_train/margins_2": 0.4919157028198242, "step": 73 }, { "epoch": 0.22, "logps_train/policy_1_2": -241.10714721679688, "logps_train/policy_1_l": -165.79156494140625, "logps_train/policy_1_w": -173.72799682617188, "logps_train/policy_2_2": -212.89068603515625, "logps_train/policy_2_w": -206.01177978515625, "logps_train/ref_1_2": -262.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -242.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 2.045536518096924, "rewards_train/1-l": -0.8072813153266907, "rewards_train/1-w": 1.8209497928619385, "rewards_train/2-2": 2.9265553951263428, "rewards_train/2-w": 0.926946222782135, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.628231108188629, "rewards_train/margins_1": -0.22458672523498535, "rewards_train/margins_2": 1.9996091723442078, "step": 73 }, { "epoch": 0.22, "learning_rate": 4.951359037609088e-06, "loss": 0.8153, "step": 74 }, { "epoch": 0.22, "logps_train/policy_1_2": -158.86373901367188, "logps_train/policy_1_l": -220.87911987304688, "logps_train/policy_1_w": -116.79789733886719, "logps_train/policy_2_2": -130.8494415283203, "logps_train/policy_2_w": -151.3355712890625, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.544877052307129, "rewards_train/1-l": -1.797287106513977, "rewards_train/1-w": 1.4764599800109863, "rewards_train/2-2": 1.7158366441726685, "rewards_train/2-w": 0.5617552995681763, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2737470865249634, "rewards_train/margins_1": -0.06841707229614258, "rewards_train/margins_2": 1.1540813446044922, "step": 74 }, { "epoch": 0.22, "logps_train/policy_1_2": -233.61492919921875, "logps_train/policy_1_l": -157.24896240234375, "logps_train/policy_1_w": -119.78036499023438, "logps_train/policy_2_2": -192.85400390625, "logps_train/policy_2_w": -143.92840576171875, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.428350806236267, "rewards_train/1-l": -0.6640554070472717, "rewards_train/1-w": 1.014151692390442, "rewards_train/2-2": 2.3841300010681152, "rewards_train/2-w": 0.5649707913398743, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.6782070994377136, "rewards_train/margins_1": -0.4141991138458252, "rewards_train/margins_2": 1.819159209728241, "step": 74 }, { "epoch": 0.22, "logps_train/policy_1_2": -128.24862670898438, "logps_train/policy_1_l": -213.67868041992188, "logps_train/policy_1_w": -163.32687377929688, "logps_train/policy_2_2": -110.4425048828125, "logps_train/policy_2_w": -186.4510498046875, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": 0.9626368284225464, "rewards_train/1-l": -1.8366162776947021, "rewards_train/1-w": 1.7743442058563232, "rewards_train/2-2": 1.261999249458313, "rewards_train/2-w": 1.115051031112671, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6109604835510254, "rewards_train/margins_1": 0.8117073774337769, "rewards_train/margins_2": 0.1469482183456421, "step": 74 }, { "epoch": 0.22, "logps_train/policy_1_2": -197.3570556640625, "logps_train/policy_1_l": -249.79562377929688, "logps_train/policy_1_w": -178.9512939453125, "logps_train/policy_2_2": -163.7626190185547, "logps_train/policy_2_w": -217.65542602539062, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.101794719696045, "rewards_train/1-l": -2.217843770980835, "rewards_train/1-w": 2.7861196994781494, "rewards_train/2-2": 1.9549884796142578, "rewards_train/2-w": 1.6532082557678223, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.003963470458984, "rewards_train/margins_1": 1.6843249797821045, "rewards_train/margins_2": 0.30178022384643555, "step": 74 }, { "epoch": 0.22, "logps_train/policy_1_2": -167.2093505859375, "logps_train/policy_1_l": -193.98776245117188, "logps_train/policy_1_w": -202.22427368164062, "logps_train/policy_2_2": -135.366943359375, "logps_train/policy_2_w": -243.13714599609375, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -223.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -251.0, "rewards_train/1-2": 0.9271121025085449, "rewards_train/1-l": -1.7214323282241821, "rewards_train/1-w": 2.053354501724243, "rewards_train/2-2": 2.2258059978485107, "rewards_train/2-w": 0.7550357580184937, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.7747868299484253, "rewards_train/margins_1": 1.1262423992156982, "rewards_train/margins_2": 1.470770239830017, "step": 74 }, { "epoch": 0.22, "logps_train/policy_1_2": -162.66468811035156, "logps_train/policy_1_l": -133.781494140625, "logps_train/policy_1_w": -141.2017822265625, "logps_train/policy_2_2": -137.86155700683594, "logps_train/policy_2_w": -163.87319946289062, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.373765468597412, "rewards_train/1-l": -1.0588126182556152, "rewards_train/1-w": 2.6329472064971924, "rewards_train/2-2": 1.8670676946640015, "rewards_train/2-w": 1.8439298868179321, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.6917598247528076, "rewards_train/margins_1": 1.2591817378997803, "rewards_train/margins_2": 0.023137807846069336, "step": 74 }, { "epoch": 0.22, "logps_train/policy_1_2": -147.76101684570312, "logps_train/policy_1_l": -54.379058837890625, "logps_train/policy_1_w": -72.80428314208984, "logps_train/policy_2_2": -124.79629516601562, "logps_train/policy_2_w": -89.42623901367188, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -49.25, "logps_train/ref_1_w": -81.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": 0.8778040409088135, "rewards_train/1-l": -0.5125154256820679, "rewards_train/1-w": 0.8367596864700317, "rewards_train/2-2": 1.35591721534729, "rewards_train/2-w": 0.3339381217956543, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 1.3492751121520996, "rewards_train/margins_1": -0.04104435443878174, "rewards_train/margins_2": 1.0219790935516357, "step": 74 }, { "epoch": 0.22, "logps_train/policy_1_2": -131.66534423828125, "logps_train/policy_1_l": -130.39129638671875, "logps_train/policy_1_w": -125.04011535644531, "logps_train/policy_2_2": -105.92359161376953, "logps_train/policy_2_w": -156.74789428710938, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -125.5, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -122.5, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": 1.0850276947021484, "rewards_train/1-l": -0.482878714799881, "rewards_train/1-w": 1.3366129398345947, "rewards_train/2-2": 1.6658440828323364, "rewards_train/2-w": 1.0033364295959473, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.8194916546344757, "rewards_train/margins_1": 0.2515852451324463, "rewards_train/margins_2": 0.6625076532363892, "step": 74 }, { "epoch": 0.22, "logps_train/policy_1_2": -140.61602783203125, "logps_train/policy_1_l": -156.1497039794922, "logps_train/policy_1_w": -96.08193969726562, "logps_train/policy_2_2": -123.14324951171875, "logps_train/policy_2_w": -110.62338256835938, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 1.2266783714294434, "rewards_train/1-l": -1.1792036294937134, "rewards_train/1-w": 0.957332968711853, "rewards_train/2-2": 1.8528621196746826, "rewards_train/2-w": 0.7716460227966309, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.1365365982055664, "rewards_train/margins_1": -0.26934540271759033, "rewards_train/margins_2": 1.0812160968780518, "step": 75 }, { "epoch": 0.22, "logps_train/policy_1_2": -117.85222625732422, "logps_train/policy_1_l": -128.78854370117188, "logps_train/policy_1_w": -86.75779724121094, "logps_train/policy_2_2": -93.5048828125, "logps_train/policy_2_w": -133.996337890625, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.015949010848999, "rewards_train/1-l": -1.4788553714752197, "rewards_train/1-w": 2.3728532791137695, "rewards_train/2-2": 1.7268556356430054, "rewards_train/2-w": 1.417358636856079, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.8517086505889893, "rewards_train/margins_1": 1.3569042682647705, "rewards_train/margins_2": 0.30949699878692627, "step": 75 }, { "epoch": 0.22, "logps_train/policy_1_2": -222.21507263183594, "logps_train/policy_1_l": -163.00949096679688, "logps_train/policy_1_w": -141.66961669921875, "logps_train/policy_2_2": -183.93063354492188, "logps_train/policy_2_w": -172.6743621826172, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": 1.7925561666488647, "rewards_train/1-l": -1.5314185619354248, "rewards_train/1-w": 3.246711254119873, "rewards_train/2-2": 2.7827184200286865, "rewards_train/2-w": 2.6286582946777344, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.778129816055298, "rewards_train/margins_1": 1.4541550874710083, "rewards_train/margins_2": 0.15406012535095215, "step": 75 }, { "epoch": 0.22, "logps_train/policy_1_2": -225.9231414794922, "logps_train/policy_1_l": -126.72821044921875, "logps_train/policy_1_w": -145.01943969726562, "logps_train/policy_2_2": -195.2276611328125, "logps_train/policy_2_w": -179.89572143554688, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 0.8209670186042786, "rewards_train/1-l": -0.47406521439552307, "rewards_train/1-w": 2.3121190071105957, "rewards_train/2-2": 1.8483279943466187, "rewards_train/2-w": 1.2323031425476074, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7861842215061188, "rewards_train/margins_1": 1.4911519885063171, "rewards_train/margins_2": 0.6160248517990112, "step": 75 }, { "epoch": 0.22, "logps_train/policy_1_2": -166.68069458007812, "logps_train/policy_1_l": -129.2008819580078, "logps_train/policy_1_w": -133.49859619140625, "logps_train/policy_2_2": -152.61167907714844, "logps_train/policy_2_w": -151.80892944335938, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.721384048461914, "rewards_train/1-l": -1.0874710083007812, "rewards_train/1-w": 2.102875232696533, "rewards_train/2-2": 2.1353163719177246, "rewards_train/2-w": 1.4780910015106201, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.1903462409973145, "rewards_train/margins_1": 0.38149118423461914, "rewards_train/margins_2": 0.6572253704071045, "step": 75 }, { "epoch": 0.22, "logps_train/policy_1_2": -287.8228454589844, "logps_train/policy_1_l": -200.07473754882812, "logps_train/policy_1_w": -156.2890625, "logps_train/policy_2_2": -228.61776733398438, "logps_train/policy_2_w": -192.26406860351562, "logps_train/ref_1_2": -300.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -255.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.2927155494689941, "rewards_train/1-l": -1.3246612548828125, "rewards_train/1-w": 1.5007818937301636, "rewards_train/2-2": 2.6382226943969727, "rewards_train/2-w": 0.8235934972763062, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.825443148612976, "rewards_train/margins_1": 0.20806634426116943, "rewards_train/margins_2": 1.8146291971206665, "step": 75 }, { "epoch": 0.22, "logps_train/policy_1_2": -112.89488983154297, "logps_train/policy_1_l": -75.939697265625, "logps_train/policy_1_w": -80.40504455566406, "logps_train/policy_2_2": -94.61544036865234, "logps_train/policy_2_w": -95.38899993896484, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -101.0, "rewards_train/1-2": 0.6144171953201294, "rewards_train/1-l": -0.5869386196136475, "rewards_train/1-w": 1.0884015560150146, "rewards_train/2-2": 0.7501745820045471, "rewards_train/2-w": 0.5743804574012756, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.675340175628662, "rewards_train/margins_1": 0.47398436069488525, "rewards_train/margins_2": 0.17579412460327148, "step": 75 }, { "epoch": 0.22, "logps_train/policy_1_2": -191.6970672607422, "logps_train/policy_1_l": -189.51075744628906, "logps_train/policy_1_w": -136.95166015625, "logps_train/policy_2_2": -161.17442321777344, "logps_train/policy_2_w": -172.0355682373047, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.314669132232666, "rewards_train/1-l": -1.1971696615219116, "rewards_train/1-w": 1.3657718896865845, "rewards_train/2-2": 1.9794330596923828, "rewards_train/2-w": 0.5558186769485474, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.562941551208496, "rewards_train/margins_1": 0.05110275745391846, "rewards_train/margins_2": 1.4236143827438354, "step": 75 }, { "epoch": 0.23, "learning_rate": 4.9463912307487605e-06, "loss": 0.89, "step": 76 }, { "epoch": 0.23, "logps_train/policy_1_2": -223.46871948242188, "logps_train/policy_1_l": -177.50933837890625, "logps_train/policy_1_w": -130.1904296875, "logps_train/policy_2_2": -172.79315185546875, "logps_train/policy_2_w": -188.06227111816406, "logps_train/ref_1_2": -239.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.559377670288086, "rewards_train/1-l": -1.816949725151062, "rewards_train/1-w": 2.2325196266174316, "rewards_train/2-2": 2.695685386657715, "rewards_train/2-w": 1.3500220775604248, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.049469351768494, "rewards_train/margins_1": 0.6731419563293457, "rewards_train/margins_2": 1.34566330909729, "step": 76 }, { "epoch": 0.23, "logps_train/policy_1_2": -214.88294982910156, "logps_train/policy_1_l": -144.46913146972656, "logps_train/policy_1_w": -73.63607788085938, "logps_train/policy_2_2": -168.2078857421875, "logps_train/policy_2_w": -95.47380065917969, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -110.0, "rewards_train/1-2": 0.4839702248573303, "rewards_train/1-l": -1.8381246328353882, "rewards_train/1-w": 1.4981105327606201, "rewards_train/2-2": 1.9984982013702393, "rewards_train/2-w": 1.502619743347168, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.3362351655960083, "rewards_train/margins_1": 1.0141403079032898, "rewards_train/margins_2": 0.4958784580230713, "step": 76 }, { "epoch": 0.23, "logps_train/policy_1_2": -100.95663452148438, "logps_train/policy_1_l": -133.66372680664062, "logps_train/policy_1_w": -117.80068969726562, "logps_train/policy_2_2": -84.11878967285156, "logps_train/policy_2_w": -151.22186279296875, "logps_train/ref_1_2": -108.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -93.5, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 0.6793367266654968, "rewards_train/1-l": -1.235903263092041, "rewards_train/1-w": 0.9328217506408691, "rewards_train/2-2": 0.9201518893241882, "rewards_train/2-w": -0.23390556871891022, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.16872501373291, "rewards_train/margins_1": 0.2534850239753723, "rewards_train/margins_2": 1.1540574580430984, "step": 76 }, { "epoch": 0.23, "logps_train/policy_1_2": -148.55101013183594, "logps_train/policy_1_l": -177.1984100341797, "logps_train/policy_1_w": -114.70793151855469, "logps_train/policy_2_2": -118.82794189453125, "logps_train/policy_2_w": -141.99227905273438, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.1683359146118164, "rewards_train/1-l": -1.26241934299469, "rewards_train/1-w": 1.144831657409668, "rewards_train/2-2": 1.3496274948120117, "rewards_train/2-w": 0.6476482152938843, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.407251000404358, "rewards_train/margins_1": -0.023504257202148438, "rewards_train/margins_2": 0.7019792795181274, "step": 76 }, { "epoch": 0.23, "logps_train/policy_1_2": -148.76754760742188, "logps_train/policy_1_l": -177.8087158203125, "logps_train/policy_1_w": -163.49581909179688, "logps_train/policy_2_2": -127.88081359863281, "logps_train/policy_2_w": -188.6029052734375, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.0826196670532227, "rewards_train/1-l": -1.3855581283569336, "rewards_train/1-w": 2.822683334350586, "rewards_train/2-2": 1.383793830871582, "rewards_train/2-w": 1.9928345680236816, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.2082414627075195, "rewards_train/margins_1": 1.7400636672973633, "rewards_train/margins_2": -0.6090407371520996, "step": 76 }, { "epoch": 0.23, "logps_train/policy_1_2": -154.55287170410156, "logps_train/policy_1_l": -147.86978149414062, "logps_train/policy_1_w": -161.09219360351562, "logps_train/policy_2_2": -122.62223815917969, "logps_train/policy_2_w": -200.05859375, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 0.9978375434875488, "rewards_train/1-l": -0.43326741456985474, "rewards_train/1-w": 2.1532816886901855, "rewards_train/2-2": 1.4159013032913208, "rewards_train/2-w": 1.1847643852233887, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.5865491032600403, "rewards_train/margins_1": 1.1554441452026367, "rewards_train/margins_2": 0.23113691806793213, "step": 76 }, { "epoch": 0.23, "logps_train/policy_1_2": -230.15904235839844, "logps_train/policy_1_l": -225.69882202148438, "logps_train/policy_1_w": -171.5136260986328, "logps_train/policy_2_2": -199.6229248046875, "logps_train/policy_2_w": -210.67015075683594, "logps_train/ref_1_2": -255.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 2.515345573425293, "rewards_train/1-l": -1.4550373554229736, "rewards_train/1-w": 2.7751994132995605, "rewards_train/2-2": 2.928331136703491, "rewards_train/2-w": 1.8720479011535645, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.230236768722534, "rewards_train/margins_1": 0.2598538398742676, "rewards_train/margins_2": 1.0562832355499268, "step": 76 }, { "epoch": 0.23, "logps_train/policy_1_2": -249.2085418701172, "logps_train/policy_1_l": -240.82864379882812, "logps_train/policy_1_w": -182.79287719726562, "logps_train/policy_2_2": -209.59451293945312, "logps_train/policy_2_w": -215.81626892089844, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -242.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 2.115863800048828, "rewards_train/1-l": -2.8844263553619385, "rewards_train/1-w": 1.6941492557525635, "rewards_train/2-2": 3.2097885608673096, "rewards_train/2-w": 1.030092716217041, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.578575611114502, "rewards_train/margins_1": -0.42171454429626465, "rewards_train/margins_2": 2.1796958446502686, "step": 76 }, { "epoch": 0.23, "logps_train/policy_1_2": -144.61363220214844, "logps_train/policy_1_l": -124.45211791992188, "logps_train/policy_1_w": -146.57626342773438, "logps_train/policy_2_2": -119.2728271484375, "logps_train/policy_2_w": -177.9121551513672, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 0.9554335474967957, "rewards_train/1-l": -1.2694298028945923, "rewards_train/1-w": 1.8337790966033936, "rewards_train/2-2": 1.4582645893096924, "rewards_train/2-w": 1.049018383026123, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.103208899497986, "rewards_train/margins_1": 0.8783455491065979, "rewards_train/margins_2": 0.40924620628356934, "step": 77 }, { "epoch": 0.23, "logps_train/policy_1_2": -164.12063598632812, "logps_train/policy_1_l": -134.97000122070312, "logps_train/policy_1_w": -154.5159149169922, "logps_train/policy_2_2": -132.25787353515625, "logps_train/policy_2_w": -187.90017700195312, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.6723116636276245, "rewards_train/1-l": -1.0032508373260498, "rewards_train/1-w": 2.211688995361328, "rewards_train/2-2": 2.1062440872192383, "rewards_train/2-w": 1.416231632232666, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.214939832687378, "rewards_train/margins_1": 0.5393773317337036, "rewards_train/margins_2": 0.6900124549865723, "step": 77 }, { "epoch": 0.23, "logps_train/policy_1_2": -175.18716430664062, "logps_train/policy_1_l": -217.6833038330078, "logps_train/policy_1_w": -177.46336364746094, "logps_train/policy_2_2": -151.68710327148438, "logps_train/policy_2_w": -214.01034545898438, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 0.6164389848709106, "rewards_train/1-l": -2.5729212760925293, "rewards_train/1-w": 1.5853047370910645, "rewards_train/2-2": 1.1176183223724365, "rewards_train/2-w": 0.4536546766757965, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.158226013183594, "rewards_train/margins_1": 0.9688657522201538, "rewards_train/margins_2": 0.66396364569664, "step": 77 }, { "epoch": 0.23, "logps_train/policy_1_2": -150.86105346679688, "logps_train/policy_1_l": -190.8324737548828, "logps_train/policy_1_w": -116.46624755859375, "logps_train/policy_2_2": -124.76316833496094, "logps_train/policy_2_w": -143.0196990966797, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.0263948440551758, "rewards_train/1-l": -1.3191847801208496, "rewards_train/1-w": 1.6440001726150513, "rewards_train/2-2": 2.0236830711364746, "rewards_train/2-w": 1.2808427810668945, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.963184952735901, "rewards_train/margins_1": 0.6176053285598755, "rewards_train/margins_2": 0.7428402900695801, "step": 77 }, { "epoch": 0.23, "logps_train/policy_1_2": -201.69955444335938, "logps_train/policy_1_l": -148.103759765625, "logps_train/policy_1_w": -90.16822814941406, "logps_train/policy_2_2": -168.49029541015625, "logps_train/policy_2_w": -118.23268127441406, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 0.726919949054718, "rewards_train/1-l": -1.0836186408996582, "rewards_train/1-w": 1.7331775426864624, "rewards_train/2-2": 1.6915967464447021, "rewards_train/2-w": 1.1314198970794678, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8167961835861206, "rewards_train/margins_1": 1.0062575936317444, "rewards_train/margins_2": 0.5601768493652344, "step": 77 }, { "epoch": 0.23, "logps_train/policy_1_2": -171.26907348632812, "logps_train/policy_1_l": -181.51519775390625, "logps_train/policy_1_w": -163.4305877685547, "logps_train/policy_2_2": -137.86602783203125, "logps_train/policy_2_w": -195.37310791015625, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.073873519897461, "rewards_train/1-l": -2.4983949661254883, "rewards_train/1-w": 2.213191270828247, "rewards_train/2-2": 1.787030577659607, "rewards_train/2-w": 1.4595646858215332, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.711586236953735, "rewards_train/margins_1": 1.1393177509307861, "rewards_train/margins_2": 0.32746589183807373, "step": 77 }, { "epoch": 0.23, "logps_train/policy_1_2": -131.4571990966797, "logps_train/policy_1_l": -129.75148010253906, "logps_train/policy_1_w": -103.61201477050781, "logps_train/policy_2_2": -99.82564544677734, "logps_train/policy_2_w": -138.82003784179688, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.6355308890342712, "rewards_train/1-l": -1.0876483917236328, "rewards_train/1-w": 1.4716109037399292, "rewards_train/2-2": 1.0135289430618286, "rewards_train/2-w": 0.6426054239273071, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.559259295463562, "rewards_train/margins_1": 0.836080014705658, "rewards_train/margins_2": 0.3709235191345215, "step": 77 }, { "epoch": 0.23, "logps_train/policy_1_2": -113.15955352783203, "logps_train/policy_1_l": -82.90579986572266, "logps_train/policy_1_w": -91.21690368652344, "logps_train/policy_2_2": -99.66202545166016, "logps_train/policy_2_w": -115.7081527709961, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -72.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 0.994200587272644, "rewards_train/1-l": -1.0940954685211182, "rewards_train/1-w": 1.7396376132965088, "rewards_train/2-2": 1.42598557472229, "rewards_train/2-w": 1.1030124425888062, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.833733081817627, "rewards_train/margins_1": 0.7454370260238647, "rewards_train/margins_2": 0.3229731321334839, "step": 77 }, { "epoch": 0.23, "learning_rate": 4.941184660417034e-06, "loss": 0.873, "step": 78 }, { "epoch": 0.23, "logps_train/policy_1_2": -99.41553497314453, "logps_train/policy_1_l": -112.27897644042969, "logps_train/policy_1_w": -72.89013671875, "logps_train/policy_2_2": -74.89768981933594, "logps_train/policy_2_w": -101.84904479980469, "logps_train/ref_1_2": -108.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -86.5, "logps_train/ref_2_2": -88.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 0.8537593483924866, "rewards_train/1-l": -0.7196944355964661, "rewards_train/1-w": 1.3461428880691528, "rewards_train/2-2": 1.3399181365966797, "rewards_train/2-w": 0.7096267938613892, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.065837323665619, "rewards_train/margins_1": 0.49238353967666626, "rewards_train/margins_2": 0.6302913427352905, "step": 78 }, { "epoch": 0.23, "logps_train/policy_1_2": -193.4948272705078, "logps_train/policy_1_l": -131.7937469482422, "logps_train/policy_1_w": -171.60409545898438, "logps_train/policy_2_2": -163.83688354492188, "logps_train/policy_2_w": -196.4937744140625, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": 0.9171925187110901, "rewards_train/1-l": -0.7212927937507629, "rewards_train/1-w": 1.9895908832550049, "rewards_train/2-2": 1.6262719631195068, "rewards_train/2-w": 1.2584364414215088, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.710883677005768, "rewards_train/margins_1": 1.0723983645439148, "rewards_train/margins_2": 0.36783552169799805, "step": 78 }, { "epoch": 0.23, "logps_train/policy_1_2": -100.07080841064453, "logps_train/policy_1_l": -95.39900970458984, "logps_train/policy_1_w": -101.35315704345703, "logps_train/policy_2_2": -84.79763793945312, "logps_train/policy_2_w": -123.32840728759766, "logps_train/ref_1_2": -106.0, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -113.5, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.5566396117210388, "rewards_train/1-l": -0.46021369099617004, "rewards_train/1-w": 1.2217152118682861, "rewards_train/2-2": 1.1344455480575562, "rewards_train/2-w": 1.081221342086792, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 1.6819289028644562, "rewards_train/margins_1": 0.6650756001472473, "rewards_train/margins_2": 0.05322420597076416, "step": 78 }, { "epoch": 0.23, "logps_train/policy_1_2": -166.83529663085938, "logps_train/policy_1_l": -145.05709838867188, "logps_train/policy_1_w": -122.49219512939453, "logps_train/policy_2_2": -144.04586791992188, "logps_train/policy_2_w": -148.61383056640625, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.0766258239746094, "rewards_train/1-l": -1.1689902544021606, "rewards_train/1-w": 1.1976556777954102, "rewards_train/2-2": 1.6094765663146973, "rewards_train/2-w": 0.5253356099128723, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.366645932197571, "rewards_train/margins_1": 0.12102985382080078, "rewards_train/margins_2": 1.084140956401825, "step": 78 }, { "epoch": 0.23, "logps_train/policy_1_2": -113.84459686279297, "logps_train/policy_1_l": -169.49435424804688, "logps_train/policy_1_w": -132.76556396484375, "logps_train/policy_2_2": -97.66778564453125, "logps_train/policy_2_w": -179.029541015625, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 0.7194466590881348, "rewards_train/1-l": -1.7838112115859985, "rewards_train/1-w": 2.189457893371582, "rewards_train/2-2": 1.1507996320724487, "rewards_train/2-w": 1.3798587322235107, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.9732691049575806, "rewards_train/margins_1": 1.4700112342834473, "rewards_train/margins_2": -0.229059100151062, "step": 78 }, { "epoch": 0.23, "logps_train/policy_1_2": -155.31980895996094, "logps_train/policy_1_l": -198.3227081298828, "logps_train/policy_1_w": -177.98126220703125, "logps_train/policy_2_2": -114.50619506835938, "logps_train/policy_2_w": -211.39813232421875, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 0.985987663269043, "rewards_train/1-l": -1.129146933555603, "rewards_train/1-w": 1.5112476348876953, "rewards_train/2-2": 1.9071927070617676, "rewards_train/2-w": 0.7164367437362671, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.6403945684432983, "rewards_train/margins_1": 0.5252599716186523, "rewards_train/margins_2": 1.1907559633255005, "step": 78 }, { "epoch": 0.23, "logps_train/policy_1_2": -76.39273071289062, "logps_train/policy_1_l": -71.31779479980469, "logps_train/policy_1_w": -75.68852233886719, "logps_train/policy_2_2": -62.953102111816406, "logps_train/policy_2_w": -98.65437316894531, "logps_train/ref_1_2": -80.5, "logps_train/ref_1_l": -63.25, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -70.5, "logps_train/ref_2_w": -109.5, "rewards_train/1-2": 0.43142974376678467, "rewards_train/1-l": -0.7956464290618896, "rewards_train/1-w": 1.6092734336853027, "rewards_train/2-2": 0.7558614015579224, "rewards_train/2-w": 1.075188159942627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.4049198627471924, "rewards_train/margins_1": 1.177843689918518, "rewards_train/margins_2": -0.3193267583847046, "step": 78 }, { "epoch": 0.23, "logps_train/policy_1_2": -58.295799255371094, "logps_train/policy_1_l": -63.70187759399414, "logps_train/policy_1_w": -75.92815399169922, "logps_train/policy_2_2": -40.68105697631836, "logps_train/policy_2_w": -99.8795166015625, "logps_train/ref_1_2": -66.0, "logps_train/ref_1_l": -57.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -52.25, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 0.7657327651977539, "rewards_train/1-l": -0.6584692597389221, "rewards_train/1-w": 2.1944894790649414, "rewards_train/2-2": 1.1592379808425903, "rewards_train/2-w": 1.6222047805786133, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 2.8529587388038635, "rewards_train/margins_1": 1.4287567138671875, "rewards_train/margins_2": -0.46296679973602295, "step": 78 }, { "epoch": 0.24, "logps_train/policy_1_2": -142.87942504882812, "logps_train/policy_1_l": -154.218017578125, "logps_train/policy_1_w": -91.99114227294922, "logps_train/policy_2_2": -123.90104675292969, "logps_train/policy_2_w": -117.12709045410156, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 0.8964332938194275, "rewards_train/1-l": -1.817895531654358, "rewards_train/1-w": 1.2727608680725098, "rewards_train/2-2": 1.405989646911621, "rewards_train/2-w": 0.6654154658317566, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0906563997268677, "rewards_train/margins_1": 0.3763275742530823, "rewards_train/margins_2": 0.7405741810798645, "step": 79 }, { "epoch": 0.24, "logps_train/policy_1_2": -163.94503784179688, "logps_train/policy_1_l": -155.2101593017578, "logps_train/policy_1_w": -111.37362670898438, "logps_train/policy_2_2": -122.74615478515625, "logps_train/policy_2_w": -152.74618530273438, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 0.8851833939552307, "rewards_train/1-l": -0.6581249237060547, "rewards_train/1-w": 1.1876370906829834, "rewards_train/2-2": 1.655072808265686, "rewards_train/2-w": 0.578506350517273, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 1.845762014389038, "rewards_train/margins_1": 0.3024536967277527, "rewards_train/margins_2": 1.076566457748413, "step": 79 }, { "epoch": 0.24, "logps_train/policy_1_2": -306.3975830078125, "logps_train/policy_1_l": -265.1846008300781, "logps_train/policy_1_w": -160.21884155273438, "logps_train/policy_2_2": -239.39334106445312, "logps_train/policy_2_w": -202.24349975585938, "logps_train/ref_1_2": -324.0, "logps_train/ref_1_l": -243.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -272.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 1.8196194171905518, "rewards_train/1-l": -2.218656539916992, "rewards_train/1-w": 2.256241798400879, "rewards_train/2-2": 3.3669180870056152, "rewards_train/2-w": 1.2587558031082153, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.474898338317871, "rewards_train/margins_1": 0.43662238121032715, "rewards_train/margins_2": 2.1081622838974, "step": 79 }, { "epoch": 0.24, "logps_train/policy_1_2": -115.70149230957031, "logps_train/policy_1_l": -119.3228988647461, "logps_train/policy_1_w": -120.92533874511719, "logps_train/policy_2_2": -95.85952758789062, "logps_train/policy_2_w": -147.0660858154297, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.2829750776290894, "rewards_train/1-l": -0.9412744045257568, "rewards_train/1-w": 0.9996532797813416, "rewards_train/2-2": 1.512484073638916, "rewards_train/2-w": 0.5043293833732605, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.9409276843070984, "rewards_train/margins_1": -0.2833217978477478, "rewards_train/margins_2": 1.0081546902656555, "step": 79 }, { "epoch": 0.24, "logps_train/policy_1_2": -89.70062255859375, "logps_train/policy_1_l": -105.42182922363281, "logps_train/policy_1_w": -64.35983276367188, "logps_train/policy_2_2": -71.19564056396484, "logps_train/policy_2_w": -92.40909576416016, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -97.5, "logps_train/ref_1_w": -76.5, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -100.0, "rewards_train/1-2": 1.014312982559204, "rewards_train/1-l": -0.7840285301208496, "rewards_train/1-w": 1.1892608404159546, "rewards_train/2-2": 1.3640789985656738, "rewards_train/2-w": 0.7941974401473999, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.9732893705368042, "rewards_train/margins_1": 0.1749478578567505, "rewards_train/margins_2": 0.5698815584182739, "step": 79 }, { "epoch": 0.24, "logps_train/policy_1_2": -158.52841186523438, "logps_train/policy_1_l": -161.3675994873047, "logps_train/policy_1_w": -173.27102661132812, "logps_train/policy_2_2": -132.37200927734375, "logps_train/policy_2_w": -203.12745666503906, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.6948156356811523, "rewards_train/1-l": -1.437004566192627, "rewards_train/1-w": 2.5227017402648926, "rewards_train/2-2": 2.264362096786499, "rewards_train/2-w": 1.4544419050216675, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9597063064575195, "rewards_train/margins_1": 0.8278861045837402, "rewards_train/margins_2": 0.8099201917648315, "step": 79 }, { "epoch": 0.24, "logps_train/policy_1_2": -206.91787719726562, "logps_train/policy_1_l": -154.66256713867188, "logps_train/policy_1_w": -131.338134765625, "logps_train/policy_2_2": -153.91111755371094, "logps_train/policy_2_w": -170.5863494873047, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.001180648803711, "rewards_train/1-l": -1.900241494178772, "rewards_train/1-w": 2.0716545581817627, "rewards_train/2-2": 1.9854512214660645, "rewards_train/2-w": 0.8210529088973999, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9718960523605347, "rewards_train/margins_1": 1.0704739093780518, "rewards_train/margins_2": 1.1643983125686646, "step": 79 }, { "epoch": 0.24, "logps_train/policy_1_2": -193.60513305664062, "logps_train/policy_1_l": -261.89227294921875, "logps_train/policy_1_w": -177.10150146484375, "logps_train/policy_2_2": -158.09466552734375, "logps_train/policy_2_w": -215.37713623046875, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -236.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 1.0043299198150635, "rewards_train/1-l": -2.535320997238159, "rewards_train/1-w": 2.441413164138794, "rewards_train/2-2": 1.977057933807373, "rewards_train/2-w": 1.6208791732788086, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.976734161376953, "rewards_train/margins_1": 1.4370832443237305, "rewards_train/margins_2": 0.35617876052856445, "step": 79 }, { "epoch": 0.24, "learning_rate": 4.935739834765994e-06, "loss": 0.9433, "step": 80 }, { "epoch": 0.24, "logps_train/policy_1_2": -190.63046264648438, "logps_train/policy_1_l": -153.715087890625, "logps_train/policy_1_w": -134.86367797851562, "logps_train/policy_2_2": -157.0087432861328, "logps_train/policy_2_w": -164.6997833251953, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.3900786638259888, "rewards_train/1-l": -1.1652582883834839, "rewards_train/1-w": 1.385507345199585, "rewards_train/2-2": 1.9944387674331665, "rewards_train/2-w": 0.7518964409828186, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.550765633583069, "rewards_train/margins_1": -0.004571318626403809, "rewards_train/margins_2": 1.242542326450348, "step": 80 }, { "epoch": 0.24, "logps_train/policy_1_2": -160.5473175048828, "logps_train/policy_1_l": -141.19947814941406, "logps_train/policy_1_w": -146.99647521972656, "logps_train/policy_2_2": -139.7346954345703, "logps_train/policy_2_w": -181.0460662841797, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.5765175819396973, "rewards_train/1-l": -0.8965098261833191, "rewards_train/1-w": 1.9456651210784912, "rewards_train/2-2": 2.032780647277832, "rewards_train/2-w": 1.1047687530517578, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.8421749472618103, "rewards_train/margins_1": 0.36914753913879395, "rewards_train/margins_2": 0.9280118942260742, "step": 80 }, { "epoch": 0.24, "logps_train/policy_1_2": -131.869140625, "logps_train/policy_1_l": -174.26324462890625, "logps_train/policy_1_w": -136.22035217285156, "logps_train/policy_2_2": -101.75869750976562, "logps_train/policy_2_w": -165.2295379638672, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 0.8380862474441528, "rewards_train/1-l": -1.5212950706481934, "rewards_train/1-w": 1.8974477052688599, "rewards_train/2-2": 1.3131932020187378, "rewards_train/2-w": 1.3484326601028442, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4187427759170532, "rewards_train/margins_1": 1.059361457824707, "rewards_train/margins_2": -0.035239458084106445, "step": 80 }, { "epoch": 0.24, "logps_train/policy_1_2": -138.98904418945312, "logps_train/policy_1_l": -84.6019287109375, "logps_train/policy_1_w": -98.04866027832031, "logps_train/policy_2_2": -125.44574737548828, "logps_train/policy_2_w": -117.69255828857422, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.1159400939941406, "rewards_train/1-l": -0.637340784072876, "rewards_train/1-w": 1.118620753288269, "rewards_train/2-2": 1.37808096408844, "rewards_train/2-w": 0.5182438492774963, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.755961537361145, "rewards_train/margins_1": 0.002680659294128418, "rewards_train/margins_2": 0.8598371148109436, "step": 80 }, { "epoch": 0.24, "logps_train/policy_1_2": -221.12994384765625, "logps_train/policy_1_l": -178.08087158203125, "logps_train/policy_1_w": -137.34677124023438, "logps_train/policy_2_2": -196.1710662841797, "logps_train/policy_2_w": -169.3293914794922, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.140129804611206, "rewards_train/1-l": -2.2807440757751465, "rewards_train/1-w": 2.021573066711426, "rewards_train/2-2": 1.732893943786621, "rewards_train/2-w": 1.2873728275299072, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.302317142486572, "rewards_train/margins_1": 0.8814432621002197, "rewards_train/margins_2": 0.44552111625671387, "step": 80 }, { "epoch": 0.24, "logps_train/policy_1_2": -189.58633422851562, "logps_train/policy_1_l": -181.50088500976562, "logps_train/policy_1_w": -137.5318145751953, "logps_train/policy_2_2": -151.28189086914062, "logps_train/policy_2_w": -167.9453125, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 0.28745973110198975, "rewards_train/1-l": -1.7713768482208252, "rewards_train/1-w": 2.7522873878479004, "rewards_train/2-2": 1.3815759420394897, "rewards_train/2-w": 1.6453125476837158, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.523664236068726, "rewards_train/margins_1": 2.4648276567459106, "rewards_train/margins_2": -0.2637366056442261, "step": 80 }, { "epoch": 0.24, "logps_train/policy_1_2": -201.98809814453125, "logps_train/policy_1_l": -270.2771911621094, "logps_train/policy_1_w": -153.26730346679688, "logps_train/policy_2_2": -171.79559326171875, "logps_train/policy_2_w": -204.45401000976562, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -234.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.0261890888214111, "rewards_train/1-l": -3.577329397201538, "rewards_train/1-w": 2.331082820892334, "rewards_train/2-2": 1.9282543659210205, "rewards_train/2-w": 1.4014736413955688, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.908412218093872, "rewards_train/margins_1": 1.3048937320709229, "rewards_train/margins_2": 0.5267807245254517, "step": 80 }, { "epoch": 0.24, "logps_train/policy_1_2": -96.20164489746094, "logps_train/policy_1_l": -53.72334289550781, "logps_train/policy_1_w": -59.53534698486328, "logps_train/policy_2_2": -67.2637710571289, "logps_train/policy_2_w": -85.85638427734375, "logps_train/ref_1_2": -107.0, "logps_train/ref_1_l": -47.75, "logps_train/ref_1_w": -70.0, "logps_train/ref_2_2": -82.5, "logps_train/ref_2_w": -89.5, "rewards_train/1-2": 1.0603041648864746, "rewards_train/1-l": -0.6011427640914917, "rewards_train/1-w": 1.0501765012741089, "rewards_train/2-2": 1.5337791442871094, "rewards_train/2-w": 0.37295544147491455, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 1.6513192653656006, "rewards_train/margins_1": -0.010127663612365723, "rewards_train/margins_2": 1.1608237028121948, "step": 80 }, { "epoch": 0.24, "logps_train/policy_1_2": -82.74956512451172, "logps_train/policy_1_l": -134.0377197265625, "logps_train/policy_1_w": -107.9920654296875, "logps_train/policy_2_2": -57.95022201538086, "logps_train/policy_2_w": -143.88003540039062, "logps_train/ref_1_2": -87.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -67.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 0.4359810948371887, "rewards_train/1-l": -1.2084590196609497, "rewards_train/1-w": 0.9672002792358398, "rewards_train/2-2": 0.9331026673316956, "rewards_train/2-w": -0.21417632699012756, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.1756592988967896, "rewards_train/margins_1": 0.5312191843986511, "rewards_train/margins_2": 1.1472789943218231, "step": 81 }, { "epoch": 0.24, "logps_train/policy_1_2": -87.01485443115234, "logps_train/policy_1_l": -171.23561096191406, "logps_train/policy_1_w": -102.94801330566406, "logps_train/policy_2_2": -76.15506744384766, "logps_train/policy_2_w": -131.23484802246094, "logps_train/ref_1_2": -92.5, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.5578900575637817, "rewards_train/1-l": -1.5399680137634277, "rewards_train/1-w": 1.0961167812347412, "rewards_train/2-2": 0.918086588382721, "rewards_train/2-w": 0.4601086378097534, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.636084794998169, "rewards_train/margins_1": 0.5382267236709595, "rewards_train/margins_2": 0.45797795057296753, "step": 81 }, { "epoch": 0.24, "logps_train/policy_1_2": -222.047607421875, "logps_train/policy_1_l": -205.01754760742188, "logps_train/policy_1_w": -146.67205810546875, "logps_train/policy_2_2": -172.7986602783203, "logps_train/policy_2_w": -191.27989196777344, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.213989019393921, "rewards_train/1-l": -2.24550461769104, "rewards_train/1-w": 1.8281077146530151, "rewards_train/2-2": 2.9513840675354004, "rewards_train/2-w": 0.714198112487793, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.073612332344055, "rewards_train/margins_1": 0.6141186952590942, "rewards_train/margins_2": 2.2371859550476074, "step": 81 }, { "epoch": 0.24, "logps_train/policy_1_2": -159.19143676757812, "logps_train/policy_1_l": -131.64556884765625, "logps_train/policy_1_w": -110.80825805664062, "logps_train/policy_2_2": -139.83447265625, "logps_train/policy_2_w": -127.60315704345703, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 1.018357276916504, "rewards_train/1-l": -0.7907284498214722, "rewards_train/1-w": 0.9726898074150085, "rewards_train/2-2": 1.4462406635284424, "rewards_train/2-w": 0.5521837472915649, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.7634182572364807, "rewards_train/margins_1": -0.04566746950149536, "rewards_train/margins_2": 0.8940569162368774, "step": 81 }, { "epoch": 0.24, "logps_train/policy_1_2": -107.49424743652344, "logps_train/policy_1_l": -158.99954223632812, "logps_train/policy_1_w": -117.96826171875, "logps_train/policy_2_2": -83.52120208740234, "logps_train/policy_2_w": -146.31814575195312, "logps_train/ref_1_2": -119.5, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.1951069831848145, "rewards_train/1-l": -2.4211463928222656, "rewards_train/1-w": 1.7670416831970215, "rewards_train/2-2": 1.6605753898620605, "rewards_train/2-w": 1.0680873394012451, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.188188076019287, "rewards_train/margins_1": 0.571934700012207, "rewards_train/margins_2": 0.5924880504608154, "step": 81 }, { "epoch": 0.24, "logps_train/policy_1_2": -141.68797302246094, "logps_train/policy_1_l": -141.0880889892578, "logps_train/policy_1_w": -125.56002807617188, "logps_train/policy_2_2": -109.64236450195312, "logps_train/policy_2_w": -167.6326141357422, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 1.3085463047027588, "rewards_train/1-l": -1.089277744293213, "rewards_train/1-w": 2.39477801322937, "rewards_train/2-2": 1.7927948236465454, "rewards_train/2-w": 0.7218946218490601, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.484055757522583, "rewards_train/margins_1": 1.0862317085266113, "rewards_train/margins_2": 1.0709002017974854, "step": 81 }, { "epoch": 0.24, "logps_train/policy_1_2": -171.51776123046875, "logps_train/policy_1_l": -139.9654998779297, "logps_train/policy_1_w": -204.15203857421875, "logps_train/policy_2_2": -141.07244873046875, "logps_train/policy_2_w": -229.76461791992188, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -226.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -242.0, "rewards_train/1-2": 1.289629340171814, "rewards_train/1-l": -1.0320488214492798, "rewards_train/1-w": 2.2191710472106934, "rewards_train/2-2": 1.9404098987579346, "rewards_train/2-w": 1.3133823871612549, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.251219868659973, "rewards_train/margins_1": 0.9295417070388794, "rewards_train/margins_2": 0.6270275115966797, "step": 81 }, { "epoch": 0.24, "logps_train/policy_1_2": -199.43450927734375, "logps_train/policy_1_l": -237.2446746826172, "logps_train/policy_1_w": -116.55232238769531, "logps_train/policy_2_2": -159.8388671875, "logps_train/policy_2_w": -157.60096740722656, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -216.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 1.293268084526062, "rewards_train/1-l": -2.1909708976745605, "rewards_train/1-w": 1.7236744165420532, "rewards_train/2-2": 2.121581554412842, "rewards_train/2-w": 1.1487901210784912, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.9146453142166138, "rewards_train/margins_1": 0.4304063320159912, "rewards_train/margins_2": 0.9727914333343506, "step": 81 }, { "epoch": 0.25, "learning_rate": 4.930057285201028e-06, "loss": 0.9031, "step": 82 }, { "epoch": 0.25, "logps_train/policy_1_2": -162.75502014160156, "logps_train/policy_1_l": -184.49929809570312, "logps_train/policy_1_w": -196.9781036376953, "logps_train/policy_2_2": -133.97056579589844, "logps_train/policy_2_w": -227.18002319335938, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -235.0, "rewards_train/1-2": 1.7077012062072754, "rewards_train/1-l": -1.7659459114074707, "rewards_train/1-w": 2.0392990112304688, "rewards_train/2-2": 2.3451309204101562, "rewards_train/2-w": 0.8413735628128052, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8052449226379395, "rewards_train/margins_1": 0.33159780502319336, "rewards_train/margins_2": 1.503757357597351, "step": 82 }, { "epoch": 0.25, "logps_train/policy_1_2": -118.1424560546875, "logps_train/policy_1_l": -100.01470184326172, "logps_train/policy_1_w": -124.86029052734375, "logps_train/policy_2_2": -99.09954833984375, "logps_train/policy_2_w": -147.008056640625, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -95.5, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 0.7685666084289551, "rewards_train/1-l": -0.44756433367729187, "rewards_train/1-w": 1.400298833847046, "rewards_train/2-2": 1.2974672317504883, "rewards_train/2-w": 0.650757908821106, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.8478631675243378, "rewards_train/margins_1": 0.6317322254180908, "rewards_train/margins_2": 0.6467093229293823, "step": 82 }, { "epoch": 0.25, "logps_train/policy_1_2": -160.321533203125, "logps_train/policy_1_l": -159.15480041503906, "logps_train/policy_1_w": -125.97062683105469, "logps_train/policy_2_2": -132.56524658203125, "logps_train/policy_2_w": -150.4752197265625, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.4756594896316528, "rewards_train/1-l": -1.1639173030853271, "rewards_train/1-w": 1.785750150680542, "rewards_train/2-2": 1.8817567825317383, "rewards_train/2-w": 1.2306030988693237, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.949667453765869, "rewards_train/margins_1": 0.31009066104888916, "rewards_train/margins_2": 0.6511536836624146, "step": 82 }, { "epoch": 0.25, "logps_train/policy_1_2": -79.39143371582031, "logps_train/policy_1_l": -90.4865951538086, "logps_train/policy_1_w": -91.01293182373047, "logps_train/policy_2_2": -62.872188568115234, "logps_train/policy_2_w": -111.80341339111328, "logps_train/ref_1_2": -83.5, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -70.5, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 0.4280444085597992, "rewards_train/1-l": -0.918581485748291, "rewards_train/1-w": 0.9369878768920898, "rewards_train/2-2": 0.7385625839233398, "rewards_train/2-w": 0.4345025420188904, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.8555693626403809, "rewards_train/margins_1": 0.5089434683322906, "rewards_train/margins_2": 0.30406004190444946, "step": 82 }, { "epoch": 0.25, "logps_train/policy_1_2": -181.19708251953125, "logps_train/policy_1_l": -220.62115478515625, "logps_train/policy_1_w": -140.38600158691406, "logps_train/policy_2_2": -152.88543701171875, "logps_train/policy_2_w": -171.20223999023438, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 1.241228699684143, "rewards_train/1-l": -2.203913450241089, "rewards_train/1-w": 1.5352282524108887, "rewards_train/2-2": 2.0395803451538086, "rewards_train/2-w": 0.7891497015953064, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.7391417026519775, "rewards_train/margins_1": 0.2939995527267456, "rewards_train/margins_2": 1.2504306435585022, "step": 82 }, { "epoch": 0.25, "logps_train/policy_1_2": -171.81979370117188, "logps_train/policy_1_l": -202.4169921875, "logps_train/policy_1_w": -111.38603973388672, "logps_train/policy_2_2": -138.48623657226562, "logps_train/policy_2_w": -138.05368041992188, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.3742711544036865, "rewards_train/1-l": -2.021190881729126, "rewards_train/1-w": 1.3199901580810547, "rewards_train/2-2": 2.3091888427734375, "rewards_train/2-w": 0.6391621828079224, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.3411810398101807, "rewards_train/margins_1": -0.054280996322631836, "rewards_train/margins_2": 1.6700266599655151, "step": 82 }, { "epoch": 0.25, "logps_train/policy_1_2": -74.72679138183594, "logps_train/policy_1_l": -120.44760131835938, "logps_train/policy_1_w": -89.53530883789062, "logps_train/policy_2_2": -56.907470703125, "logps_train/policy_2_w": -109.16007995605469, "logps_train/ref_1_2": -85.0, "logps_train/ref_1_l": -105.5, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -70.0, "logps_train/ref_2_w": -121.5, "rewards_train/1-2": 1.0616958141326904, "rewards_train/1-l": -1.500229001045227, "rewards_train/1-w": 1.595590353012085, "rewards_train/2-2": 1.3342530727386475, "rewards_train/2-w": 1.2482504844665527, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.095819354057312, "rewards_train/margins_1": 0.5338945388793945, "rewards_train/margins_2": 0.08600258827209473, "step": 82 }, { "epoch": 0.25, "logps_train/policy_1_2": -173.14120483398438, "logps_train/policy_1_l": -132.1646270751953, "logps_train/policy_1_w": -98.37557220458984, "logps_train/policy_2_2": -130.94654846191406, "logps_train/policy_2_w": -124.16618347167969, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -113.5, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.1843183040618896, "rewards_train/1-l": -0.5504476428031921, "rewards_train/1-w": 1.5163490772247314, "rewards_train/2-2": 1.901829481124878, "rewards_train/2-w": 0.9927564859390259, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.0667967200279236, "rewards_train/margins_1": 0.3320307731628418, "rewards_train/margins_2": 0.909072995185852, "step": 82 }, { "epoch": 0.25, "logps_train/policy_1_2": -186.97811889648438, "logps_train/policy_1_l": -176.87503051757812, "logps_train/policy_1_w": -195.55398559570312, "logps_train/policy_2_2": -161.86082458496094, "logps_train/policy_2_w": -222.20091247558594, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -245.0, "rewards_train/1-2": 1.871719241142273, "rewards_train/1-l": -1.1423866748809814, "rewards_train/1-w": 2.8438191413879395, "rewards_train/2-2": 2.184229850769043, "rewards_train/2-w": 2.2777609825134277, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.986205816268921, "rewards_train/margins_1": 0.9720999002456665, "rewards_train/margins_2": -0.09353113174438477, "step": 83 }, { "epoch": 0.25, "logps_train/policy_1_2": -183.57110595703125, "logps_train/policy_1_l": -201.311767578125, "logps_train/policy_1_w": -142.91683959960938, "logps_train/policy_2_2": -156.2381591796875, "logps_train/policy_2_w": -184.10255432128906, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 1.236639142036438, "rewards_train/1-l": -2.4827396869659424, "rewards_train/1-w": 1.373940348625183, "rewards_train/2-2": 2.054309844970703, "rewards_train/2-w": 0.8194327354431152, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8566800355911255, "rewards_train/margins_1": 0.13730120658874512, "rewards_train/margins_2": 1.234877109527588, "step": 83 }, { "epoch": 0.25, "logps_train/policy_1_2": -141.42495727539062, "logps_train/policy_1_l": -197.26524353027344, "logps_train/policy_1_w": -161.13299560546875, "logps_train/policy_2_2": -110.78634643554688, "logps_train/policy_2_w": -197.25192260742188, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.0321131944656372, "rewards_train/1-l": -1.536679983139038, "rewards_train/1-w": 2.460137367248535, "rewards_train/2-2": 1.5920679569244385, "rewards_train/2-w": 1.5388720035552979, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.9968173503875732, "rewards_train/margins_1": 1.428024172782898, "rewards_train/margins_2": 0.053195953369140625, "step": 83 }, { "epoch": 0.25, "logps_train/policy_1_2": -236.8863983154297, "logps_train/policy_1_l": -143.06690979003906, "logps_train/policy_1_w": -136.01327514648438, "logps_train/policy_2_2": -187.17434692382812, "logps_train/policy_2_w": -173.69525146484375, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 0.6517415046691895, "rewards_train/1-l": -1.2176281213760376, "rewards_train/1-w": 1.7590248584747314, "rewards_train/2-2": 2.25854229927063, "rewards_train/2-w": 0.8003973364830017, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.976652979850769, "rewards_train/margins_1": 1.107283353805542, "rewards_train/margins_2": 1.4581449627876282, "step": 83 }, { "epoch": 0.25, "logps_train/policy_1_2": -157.71290588378906, "logps_train/policy_1_l": -207.5485382080078, "logps_train/policy_1_w": -133.53768920898438, "logps_train/policy_2_2": -124.74697875976562, "logps_train/policy_2_w": -171.13050842285156, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 0.6818348169326782, "rewards_train/1-l": -1.792353868484497, "rewards_train/1-w": 1.8188873529434204, "rewards_train/2-2": 1.5221774578094482, "rewards_train/2-w": 1.0619490146636963, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.6112412214279175, "rewards_train/margins_1": 1.1370525360107422, "rewards_train/margins_2": 0.46022844314575195, "step": 83 }, { "epoch": 0.25, "logps_train/policy_1_2": -90.55030822753906, "logps_train/policy_1_l": -36.521324157714844, "logps_train/policy_1_w": -115.16758728027344, "logps_train/policy_2_2": -71.73542785644531, "logps_train/policy_2_w": -139.49758911132812, "logps_train/ref_1_2": -98.0, "logps_train/ref_1_l": -32.5, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -82.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.7637189626693726, "rewards_train/1-l": -0.38904619216918945, "rewards_train/1-w": 2.044178009033203, "rewards_train/2-2": 1.041691780090332, "rewards_train/2-w": 1.2010233402252197, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.4332242012023926, "rewards_train/margins_1": 1.2804590463638306, "rewards_train/margins_2": -0.1593315601348877, "step": 83 }, { "epoch": 0.25, "logps_train/policy_1_2": -123.6277847290039, "logps_train/policy_1_l": -160.38600158691406, "logps_train/policy_1_w": -161.8720703125, "logps_train/policy_2_2": -101.79334259033203, "logps_train/policy_2_w": -194.29165649414062, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 1.2919094562530518, "rewards_train/1-l": -1.505982518196106, "rewards_train/1-w": 2.340916872024536, "rewards_train/2-2": 1.7003529071807861, "rewards_train/2-w": 1.0489592552185059, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.846899390220642, "rewards_train/margins_1": 1.0490074157714844, "rewards_train/margins_2": 0.6513936519622803, "step": 83 }, { "epoch": 0.25, "logps_train/policy_1_2": -55.44071578979492, "logps_train/policy_1_l": -30.40721893310547, "logps_train/policy_1_w": -44.911678314208984, "logps_train/policy_2_2": -33.88399887084961, "logps_train/policy_2_w": -64.64950561523438, "logps_train/ref_1_2": -64.5, "logps_train/ref_1_l": -27.25, "logps_train/ref_1_w": -56.25, "logps_train/ref_2_2": -44.5, "logps_train/ref_2_w": -71.5, "rewards_train/1-2": 0.9188189506530762, "rewards_train/1-l": -0.309813529253006, "rewards_train/1-w": 1.1439882516860962, "rewards_train/2-2": 1.056522011756897, "rewards_train/2-w": 0.6936434507369995, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.4538017809391022, "rewards_train/margins_1": 0.22516930103302002, "rewards_train/margins_2": 0.36287856101989746, "step": 83 }, { "epoch": 0.25, "learning_rate": 4.924137566328951e-06, "loss": 0.8831, "step": 84 }, { "epoch": 0.25, "logps_train/policy_1_2": -189.5931854248047, "logps_train/policy_1_l": -83.93202209472656, "logps_train/policy_1_w": -74.75975799560547, "logps_train/policy_2_2": -145.21749877929688, "logps_train/policy_2_w": -113.61954498291016, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": 1.682057499885559, "rewards_train/1-l": -0.3105670213699341, "rewards_train/1-w": 1.211493968963623, "rewards_train/2-2": 2.578434705734253, "rewards_train/2-w": 0.6894005537033081, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.5220609903335571, "rewards_train/margins_1": -0.47056353092193604, "rewards_train/margins_2": 1.8890341520309448, "step": 84 }, { "epoch": 0.25, "logps_train/policy_1_2": -170.7693634033203, "logps_train/policy_1_l": -149.966552734375, "logps_train/policy_1_w": -133.0230712890625, "logps_train/policy_2_2": -141.8327178955078, "logps_train/policy_2_w": -161.74880981445312, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 2.277751922607422, "rewards_train/1-l": -1.0599358081817627, "rewards_train/1-w": 2.0758168697357178, "rewards_train/2-2": 3.063602924346924, "rewards_train/2-w": 1.4938693046569824, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.1357526779174805, "rewards_train/margins_1": -0.2019350528717041, "rewards_train/margins_2": 1.5697336196899414, "step": 84 }, { "epoch": 0.25, "logps_train/policy_1_2": -203.3389892578125, "logps_train/policy_1_l": -185.8059844970703, "logps_train/policy_1_w": -186.26820373535156, "logps_train/policy_2_2": -176.73046875, "logps_train/policy_2_w": -218.85073852539062, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -237.0, "rewards_train/1-2": 1.5879746675491333, "rewards_train/1-l": -1.8032546043395996, "rewards_train/1-w": 2.785679340362549, "rewards_train/2-2": 2.0769543647766113, "rewards_train/2-w": 1.7524266242980957, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.588933944702148, "rewards_train/margins_1": 1.1977046728134155, "rewards_train/margins_2": 0.3245277404785156, "step": 84 }, { "epoch": 0.25, "logps_train/policy_1_2": -113.0888442993164, "logps_train/policy_1_l": -133.8985595703125, "logps_train/policy_1_w": -62.41318893432617, "logps_train/policy_2_2": -96.32048797607422, "logps_train/policy_2_w": -80.89508819580078, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -71.0, "logps_train/ref_2_2": -113.5, "logps_train/ref_2_w": -83.0, "rewards_train/1-2": 1.3196312189102173, "rewards_train/1-l": -0.7255984544754028, "rewards_train/1-w": 0.86854487657547, "rewards_train/2-2": 1.726739525794983, "rewards_train/2-w": 0.24647778272628784, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.5941433310508728, "rewards_train/margins_1": -0.4510863423347473, "rewards_train/margins_2": 1.480261743068695, "step": 84 }, { "epoch": 0.25, "logps_train/policy_1_2": -160.5614776611328, "logps_train/policy_1_l": -184.19354248046875, "logps_train/policy_1_w": -148.28387451171875, "logps_train/policy_2_2": -123.51849365234375, "logps_train/policy_2_w": -191.58258056640625, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 1.3157278299331665, "rewards_train/1-l": -2.0137887001037598, "rewards_train/1-w": 2.755401611328125, "rewards_train/2-2": 2.2051823139190674, "rewards_train/2-w": 1.3599060773849487, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.769190311431885, "rewards_train/margins_1": 1.4396737813949585, "rewards_train/margins_2": 0.8452762365341187, "step": 84 }, { "epoch": 0.25, "logps_train/policy_1_2": -96.47421264648438, "logps_train/policy_1_l": -161.529052734375, "logps_train/policy_1_w": -113.63129425048828, "logps_train/policy_2_2": -76.90180969238281, "logps_train/policy_2_w": -139.27011108398438, "logps_train/ref_1_2": -102.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 0.5428130626678467, "rewards_train/1-l": -1.4982167482376099, "rewards_train/1-w": 1.4384331703186035, "rewards_train/2-2": 0.9723185300827026, "rewards_train/2-w": 0.36361414194107056, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.9366499185562134, "rewards_train/margins_1": 0.8956201076507568, "rewards_train/margins_2": 0.6087043881416321, "step": 84 }, { "epoch": 0.25, "logps_train/policy_1_2": -189.1334686279297, "logps_train/policy_1_l": -199.88824462890625, "logps_train/policy_1_w": -171.28782653808594, "logps_train/policy_2_2": -151.95980834960938, "logps_train/policy_2_w": -198.75381469726562, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.5147775411605835, "rewards_train/1-l": -1.1325743198394775, "rewards_train/1-w": 1.7024683952331543, "rewards_train/2-2": 2.1743323802948, "rewards_train/2-w": 1.1871180534362793, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.835042715072632, "rewards_train/margins_1": 0.1876908540725708, "rewards_train/margins_2": 0.9872143268585205, "step": 84 }, { "epoch": 0.25, "logps_train/policy_1_2": -92.79934692382812, "logps_train/policy_1_l": -116.54432678222656, "logps_train/policy_1_w": -119.35453796386719, "logps_train/policy_2_2": -74.66419219970703, "logps_train/policy_2_w": -146.45654296875, "logps_train/ref_1_2": -98.5, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -84.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.5661587119102478, "rewards_train/1-l": -1.2696666717529297, "rewards_train/1-w": 1.3555610179901123, "rewards_train/2-2": 0.9507683515548706, "rewards_train/2-w": 0.5504390001296997, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.625227689743042, "rewards_train/margins_1": 0.7894023060798645, "rewards_train/margins_2": 0.4003293514251709, "step": 84 }, { "epoch": 0.25, "logps_train/policy_1_2": -82.98573303222656, "logps_train/policy_1_l": -98.0665512084961, "logps_train/policy_1_w": -87.19353485107422, "logps_train/policy_2_2": -70.29652404785156, "logps_train/policy_2_w": -111.00637817382812, "logps_train/ref_1_2": -89.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -79.5, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 0.6318957209587097, "rewards_train/1-l": -0.7675923109054565, "rewards_train/1-w": 1.296271562576294, "rewards_train/2-2": 0.913316011428833, "rewards_train/2-w": 0.6653785705566406, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.0638638734817505, "rewards_train/margins_1": 0.6643758416175842, "rewards_train/margins_2": 0.24793744087219238, "step": 85 }, { "epoch": 0.25, "logps_train/policy_1_2": -132.00747680664062, "logps_train/policy_1_l": -67.348388671875, "logps_train/policy_1_w": -73.49375915527344, "logps_train/policy_2_2": -113.91645812988281, "logps_train/policy_2_w": -99.42022705078125, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -62.25, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -100.0, "rewards_train/1-2": 0.936362087726593, "rewards_train/1-l": -0.511401891708374, "rewards_train/1-w": 0.4584367275238037, "rewards_train/2-2": 1.2087444067001343, "rewards_train/2-w": 0.027508310973644257, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 0.9698386192321777, "rewards_train/margins_1": -0.4779253602027893, "rewards_train/margins_2": 1.18123609572649, "step": 85 }, { "epoch": 0.25, "logps_train/policy_1_2": -245.4921417236328, "logps_train/policy_1_l": -213.5189208984375, "logps_train/policy_1_w": -153.1197967529297, "logps_train/policy_2_2": -182.4519805908203, "logps_train/policy_2_w": -207.04180908203125, "logps_train/ref_1_2": -254.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 0.8695356845855713, "rewards_train/1-l": -2.833141565322876, "rewards_train/1-w": 2.250520706176758, "rewards_train/2-2": 2.196988821029663, "rewards_train/2-w": 0.5801956057548523, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.083662271499634, "rewards_train/margins_1": 1.3809850215911865, "rewards_train/margins_2": 1.6167932152748108, "step": 85 }, { "epoch": 0.25, "logps_train/policy_1_2": -300.6703186035156, "logps_train/policy_1_l": -229.65164184570312, "logps_train/policy_1_w": -168.3903350830078, "logps_train/policy_2_2": -267.4000549316406, "logps_train/policy_2_w": -197.7589111328125, "logps_train/ref_1_2": -328.0, "logps_train/ref_1_l": -215.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -308.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 2.836094379425049, "rewards_train/1-l": -1.4928967952728271, "rewards_train/1-w": 3.1375293731689453, "rewards_train/2-2": 3.949838638305664, "rewards_train/2-w": 2.632701873779297, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.6304261684417725, "rewards_train/margins_1": 0.3014349937438965, "rewards_train/margins_2": 1.3171367645263672, "step": 85 }, { "epoch": 0.25, "logps_train/policy_1_2": -165.7674102783203, "logps_train/policy_1_l": -171.5540313720703, "logps_train/policy_1_w": -113.80496215820312, "logps_train/policy_2_2": -141.62991333007812, "logps_train/policy_2_w": -143.5169219970703, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 2.142399787902832, "rewards_train/1-l": -1.5157560110092163, "rewards_train/1-w": 1.5107141733169556, "rewards_train/2-2": 2.8354461193084717, "rewards_train/2-w": 0.9522136449813843, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.026470184326172, "rewards_train/margins_1": -0.6316856145858765, "rewards_train/margins_2": 1.8832324743270874, "step": 85 }, { "epoch": 0.25, "logps_train/policy_1_2": -171.90139770507812, "logps_train/policy_1_l": -134.2021942138672, "logps_train/policy_1_w": -112.39827728271484, "logps_train/policy_2_2": -134.04849243164062, "logps_train/policy_2_w": -133.33370971679688, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 0.8926733732223511, "rewards_train/1-l": -1.1293991804122925, "rewards_train/1-w": 1.3234531879425049, "rewards_train/2-2": 2.2248380184173584, "rewards_train/2-w": 0.5049101114273071, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.4528523683547974, "rewards_train/margins_1": 0.4307798147201538, "rewards_train/margins_2": 1.7199279069900513, "step": 85 }, { "epoch": 0.25, "logps_train/policy_1_2": -144.5245361328125, "logps_train/policy_1_l": -113.5479507446289, "logps_train/policy_1_w": -94.09567260742188, "logps_train/policy_2_2": -124.62359619140625, "logps_train/policy_2_w": -112.47483825683594, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -111.5, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 1.1053595542907715, "rewards_train/1-l": -1.3745219707489014, "rewards_train/1-w": 1.741994857788086, "rewards_train/2-2": 1.5595154762268066, "rewards_train/2-w": 1.2993905544281006, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1165168285369873, "rewards_train/margins_1": 0.6366353034973145, "rewards_train/margins_2": 0.26012492179870605, "step": 85 }, { "epoch": 0.25, "logps_train/policy_1_2": -167.37548828125, "logps_train/policy_1_l": -136.42816162109375, "logps_train/policy_1_w": -131.15147399902344, "logps_train/policy_2_2": -118.554931640625, "logps_train/policy_2_w": -169.316650390625, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.0030765533447266, "rewards_train/1-l": -0.8178156614303589, "rewards_train/1-w": 2.0270400047302246, "rewards_train/2-2": 2.14138126373291, "rewards_train/2-w": 1.3105233907699585, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.8448556661605835, "rewards_train/margins_1": 1.023963451385498, "rewards_train/margins_2": 0.8308578729629517, "step": 85 }, { "epoch": 0.26, "learning_rate": 4.917981255903893e-06, "loss": 0.8922, "step": 86 }, { "epoch": 0.26, "logps_train/policy_1_2": -209.96316528320312, "logps_train/policy_1_l": -169.07260131835938, "logps_train/policy_1_w": -174.67080688476562, "logps_train/policy_2_2": -178.8079833984375, "logps_train/policy_2_w": -200.44985961914062, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -203.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 1.784933090209961, "rewards_train/1-l": -1.2773774862289429, "rewards_train/1-w": 2.0313560962677, "rewards_train/2-2": 2.42232608795166, "rewards_train/2-w": 1.4331386089324951, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.308733582496643, "rewards_train/margins_1": 0.24642300605773926, "rewards_train/margins_2": 0.989187479019165, "step": 86 }, { "epoch": 0.26, "logps_train/policy_1_2": -123.78358459472656, "logps_train/policy_1_l": -130.32864379882812, "logps_train/policy_1_w": -112.12083435058594, "logps_train/policy_2_2": -105.35269165039062, "logps_train/policy_2_w": -146.47634887695312, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.0497665405273438, "rewards_train/1-l": -1.3043484687805176, "rewards_train/1-w": 1.9726824760437012, "rewards_train/2-2": 1.5880703926086426, "rewards_train/2-w": 0.9476773738861084, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.2770309448242188, "rewards_train/margins_1": 0.9229159355163574, "rewards_train/margins_2": 0.6403930187225342, "step": 86 }, { "epoch": 0.26, "logps_train/policy_1_2": -83.2448959350586, "logps_train/policy_1_l": -113.92268371582031, "logps_train/policy_1_w": -88.51285552978516, "logps_train/policy_2_2": -63.43334197998047, "logps_train/policy_2_w": -116.61979675292969, "logps_train/ref_1_2": -90.5, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -76.0, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": 0.7403544783592224, "rewards_train/1-l": -1.4125816822052002, "rewards_train/1-w": 1.1963706016540527, "rewards_train/2-2": 1.2371346950531006, "rewards_train/2-w": 0.38958239555358887, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.608952283859253, "rewards_train/margins_1": 0.4560161232948303, "rewards_train/margins_2": 0.8475522994995117, "step": 86 }, { "epoch": 0.26, "logps_train/policy_1_2": -118.47959899902344, "logps_train/policy_1_l": -118.17969512939453, "logps_train/policy_1_w": -58.92656326293945, "logps_train/policy_2_2": -91.92311096191406, "logps_train/policy_2_w": -80.62126159667969, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -68.5, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -88.5, "rewards_train/1-2": 0.5657122135162354, "rewards_train/1-l": -1.0099613666534424, "rewards_train/1-w": 0.9712106585502625, "rewards_train/2-2": 1.2967517375946045, "rewards_train/2-w": 0.7652177214622498, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.9811720252037048, "rewards_train/margins_1": 0.4054984450340271, "rewards_train/margins_2": 0.5315340161323547, "step": 86 }, { "epoch": 0.26, "logps_train/policy_1_2": -108.67698669433594, "logps_train/policy_1_l": -126.69949340820312, "logps_train/policy_1_w": -124.98123168945312, "logps_train/policy_2_2": -86.0973892211914, "logps_train/policy_2_w": -159.01272583007812, "logps_train/ref_1_2": -114.5, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 0.5891370177268982, "rewards_train/1-l": -1.5775667428970337, "rewards_train/1-w": 1.77687668800354, "rewards_train/2-2": 1.2113547325134277, "rewards_train/2-w": 0.7260719537734985, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3544434309005737, "rewards_train/margins_1": 1.1877396702766418, "rewards_train/margins_2": 0.4852827787399292, "step": 86 }, { "epoch": 0.26, "logps_train/policy_1_2": -80.62354278564453, "logps_train/policy_1_l": -81.35836029052734, "logps_train/policy_1_w": -109.98438262939453, "logps_train/policy_2_2": -65.05706024169922, "logps_train/policy_2_w": -135.3514404296875, "logps_train/ref_1_2": -86.0, "logps_train/ref_1_l": -76.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 0.5505361557006836, "rewards_train/1-l": -0.5088342428207397, "rewards_train/1-w": 1.3384760618209839, "rewards_train/2-2": 0.8817940354347229, "rewards_train/2-w": 0.9115357398986816, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.8473103046417236, "rewards_train/margins_1": 0.7879399061203003, "rewards_train/margins_2": -0.02974170446395874, "step": 86 }, { "epoch": 0.26, "logps_train/policy_1_2": -264.8297119140625, "logps_train/policy_1_l": -266.79034423828125, "logps_train/policy_1_w": -183.13890075683594, "logps_train/policy_2_2": -224.13238525390625, "logps_train/policy_2_w": -217.20736694335938, "logps_train/ref_1_2": -284.0, "logps_train/ref_1_l": -248.0, "logps_train/ref_1_w": -209.0, "logps_train/ref_2_2": -253.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.9670283794403076, "rewards_train/1-l": -1.9134104251861572, "rewards_train/1-w": 2.5704851150512695, "rewards_train/2-2": 2.855510711669922, "rewards_train/2-w": 2.004263162612915, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.483895540237427, "rewards_train/margins_1": 0.6034567356109619, "rewards_train/margins_2": 0.8512475490570068, "step": 86 }, { "epoch": 0.26, "logps_train/policy_1_2": -187.73684692382812, "logps_train/policy_1_l": -130.9358673095703, "logps_train/policy_1_w": -121.09629821777344, "logps_train/policy_2_2": -145.42611694335938, "logps_train/policy_2_w": -165.94435119628906, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": 0.220066636800766, "rewards_train/1-l": -1.0087238550186157, "rewards_train/1-w": 1.8075578212738037, "rewards_train/2-2": 1.1487946510314941, "rewards_train/2-w": 0.7024400234222412, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.8162816762924194, "rewards_train/margins_1": 1.5874911844730377, "rewards_train/margins_2": 0.44635462760925293, "step": 86 }, { "epoch": 0.26, "logps_train/policy_1_2": -139.09197998046875, "logps_train/policy_1_l": -149.21038818359375, "logps_train/policy_1_w": -132.61981201171875, "logps_train/policy_2_2": -105.83191680908203, "logps_train/policy_2_w": -158.80836486816406, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 1.748420000076294, "rewards_train/1-l": -0.9498966932296753, "rewards_train/1-w": 2.8404123783111572, "rewards_train/2-2": 2.518761157989502, "rewards_train/2-w": 2.042064666748047, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.7903090715408325, "rewards_train/margins_1": 1.0919923782348633, "rewards_train/margins_2": 0.4766964912414551, "step": 87 }, { "epoch": 0.26, "logps_train/policy_1_2": -112.34964752197266, "logps_train/policy_1_l": -144.70681762695312, "logps_train/policy_1_w": -179.5135498046875, "logps_train/policy_2_2": -99.27616882324219, "logps_train/policy_2_w": -199.0514373779297, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.236519694328308, "rewards_train/1-l": -1.0519312620162964, "rewards_train/1-w": 2.4455204010009766, "rewards_train/2-2": 1.641913890838623, "rewards_train/2-w": 1.941731333732605, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.497451663017273, "rewards_train/margins_1": 1.2090007066726685, "rewards_train/margins_2": -0.29981744289398193, "step": 87 }, { "epoch": 0.26, "logps_train/policy_1_2": -148.61538696289062, "logps_train/policy_1_l": -154.712646484375, "logps_train/policy_1_w": -135.60772705078125, "logps_train/policy_2_2": -121.39579772949219, "logps_train/policy_2_w": -165.4123077392578, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.5525249242782593, "rewards_train/1-l": -0.9618889689445496, "rewards_train/1-w": 2.2103219032287598, "rewards_train/2-2": 2.2604198455810547, "rewards_train/2-w": 1.2868942022323608, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.1722108721733093, "rewards_train/margins_1": 0.6577969789505005, "rewards_train/margins_2": 0.9735256433486938, "step": 87 }, { "epoch": 0.26, "logps_train/policy_1_2": -214.46322631835938, "logps_train/policy_1_l": -153.76739501953125, "logps_train/policy_1_w": -146.4933624267578, "logps_train/policy_2_2": -178.15090942382812, "logps_train/policy_2_w": -187.73219299316406, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -201.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.544302225112915, "rewards_train/1-l": -1.0138486623764038, "rewards_train/1-w": 1.9256644248962402, "rewards_train/2-2": 2.291940212249756, "rewards_train/2-w": 1.0080311298370361, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.939513087272644, "rewards_train/margins_1": 0.3813621997833252, "rewards_train/margins_2": 1.2839090824127197, "step": 87 }, { "epoch": 0.26, "logps_train/policy_1_2": -213.42762756347656, "logps_train/policy_1_l": -204.3439483642578, "logps_train/policy_1_w": -147.429443359375, "logps_train/policy_2_2": -172.45709228515625, "logps_train/policy_2_w": -189.41958618164062, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.2505967617034912, "rewards_train/1-l": -1.6258020401000977, "rewards_train/1-w": 2.092602252960205, "rewards_train/2-2": 2.169525623321533, "rewards_train/2-w": 1.1119482517242432, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7184042930603027, "rewards_train/margins_1": 0.8420054912567139, "rewards_train/margins_2": 1.05757737159729, "step": 87 }, { "epoch": 0.26, "logps_train/policy_1_2": -240.07435607910156, "logps_train/policy_1_l": -230.472900390625, "logps_train/policy_1_w": -124.60264587402344, "logps_train/policy_2_2": -202.3482208251953, "logps_train/policy_2_w": -158.37841796875, "logps_train/ref_1_2": -255.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.5097510814666748, "rewards_train/1-l": -1.8213138580322266, "rewards_train/1-w": 2.4778215885162354, "rewards_train/2-2": 2.972992181777954, "rewards_train/2-w": 1.8250479698181152, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.299135446548462, "rewards_train/margins_1": 0.9680705070495605, "rewards_train/margins_2": 1.1479442119598389, "step": 87 }, { "epoch": 0.26, "logps_train/policy_1_2": -189.6153564453125, "logps_train/policy_1_l": -139.13046264648438, "logps_train/policy_1_w": -124.33826446533203, "logps_train/policy_2_2": -161.23519897460938, "logps_train/policy_2_w": -150.07412719726562, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.1408076286315918, "rewards_train/1-l": -0.922029972076416, "rewards_train/1-w": 1.8021104335784912, "rewards_train/2-2": 2.1159322261810303, "rewards_train/2-w": 1.0457115173339844, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.7241404056549072, "rewards_train/margins_1": 0.6613028049468994, "rewards_train/margins_2": 1.070220708847046, "step": 87 }, { "epoch": 0.26, "logps_train/policy_1_2": -212.37246704101562, "logps_train/policy_1_l": -156.53524780273438, "logps_train/policy_1_w": -112.80697631835938, "logps_train/policy_2_2": -168.5215301513672, "logps_train/policy_2_w": -139.09169006347656, "logps_train/ref_1_2": -227.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 1.4127528667449951, "rewards_train/1-l": -1.7441500425338745, "rewards_train/1-w": 1.4624664783477783, "rewards_train/2-2": 2.7775349617004395, "rewards_train/2-w": 1.0216901302337646, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.206616520881653, "rewards_train/margins_1": 0.0497136116027832, "rewards_train/margins_2": 1.7558448314666748, "step": 87 }, { "epoch": 0.26, "learning_rate": 4.9115889547708975e-06, "loss": 0.8918, "step": 88 }, { "epoch": 0.26, "logps_train/policy_1_2": -187.82974243164062, "logps_train/policy_1_l": -134.28582763671875, "logps_train/policy_1_w": -120.75813293457031, "logps_train/policy_2_2": -149.33468627929688, "logps_train/policy_2_w": -148.021728515625, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 0.9732764959335327, "rewards_train/1-l": -1.5098319053649902, "rewards_train/1-w": 2.255436897277832, "rewards_train/2-2": 1.954031229019165, "rewards_train/2-w": 1.4954822063446045, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.7652688026428223, "rewards_train/margins_1": 1.2821604013442993, "rewards_train/margins_2": 0.45854902267456055, "step": 88 }, { "epoch": 0.26, "logps_train/policy_1_2": -134.10646057128906, "logps_train/policy_1_l": -116.884033203125, "logps_train/policy_1_w": -59.939579010009766, "logps_train/policy_2_2": -99.55938720703125, "logps_train/policy_2_w": -80.70175170898438, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -71.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -89.0, "rewards_train/1-2": 1.0588852167129517, "rewards_train/1-l": -1.5040276050567627, "rewards_train/1-w": 1.123229742050171, "rewards_train/2-2": 1.8799983263015747, "rewards_train/2-w": 0.8337308764457703, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.6272573471069336, "rewards_train/margins_1": 0.06434452533721924, "rewards_train/margins_2": 1.0462674498558044, "step": 88 }, { "epoch": 0.26, "logps_train/policy_1_2": -99.44096374511719, "logps_train/policy_1_l": -91.23806762695312, "logps_train/policy_1_w": -103.08536529541016, "logps_train/policy_2_2": -82.97306060791016, "logps_train/policy_2_w": -122.01458740234375, "logps_train/ref_1_2": -106.0, "logps_train/ref_1_l": -80.5, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 0.6848105788230896, "rewards_train/1-l": -1.099735140800476, "rewards_train/1-w": 1.5335042476654053, "rewards_train/2-2": 0.883162796497345, "rewards_train/2-w": 1.11729097366333, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.6332393884658813, "rewards_train/margins_1": 0.8486936688423157, "rewards_train/margins_2": -0.2341281771659851, "step": 88 }, { "epoch": 0.26, "logps_train/policy_1_2": -234.35687255859375, "logps_train/policy_1_l": -187.41995239257812, "logps_train/policy_1_w": -194.50650024414062, "logps_train/policy_2_2": -190.636474609375, "logps_train/policy_2_w": -244.6182098388672, "logps_train/ref_1_2": -254.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -223.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 1.9768123626708984, "rewards_train/1-l": -1.5154328346252441, "rewards_train/1-w": 2.924348831176758, "rewards_train/2-2": 3.23635196685791, "rewards_train/2-w": 1.6631804704666138, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.439781665802002, "rewards_train/margins_1": 0.9475364685058594, "rewards_train/margins_2": 1.5731714963912964, "step": 88 }, { "epoch": 0.26, "logps_train/policy_1_2": -215.296142578125, "logps_train/policy_1_l": -231.18603515625, "logps_train/policy_1_w": -152.0165557861328, "logps_train/policy_2_2": -179.72872924804688, "logps_train/policy_2_w": -196.15150451660156, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.6953880786895752, "rewards_train/1-l": -1.335031509399414, "rewards_train/1-w": 2.17490816116333, "rewards_train/2-2": 2.3990023136138916, "rewards_train/2-w": 1.3598501682281494, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.509939670562744, "rewards_train/margins_1": 0.4795200824737549, "rewards_train/margins_2": 1.0391521453857422, "step": 88 }, { "epoch": 0.26, "logps_train/policy_1_2": -121.64705657958984, "logps_train/policy_1_l": -74.08370971679688, "logps_train/policy_1_w": -84.06800842285156, "logps_train/policy_2_2": -93.74885559082031, "logps_train/policy_2_w": -112.21754455566406, "logps_train/ref_1_2": -127.5, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 0.5767006278038025, "rewards_train/1-l": -0.3705785870552063, "rewards_train/1-w": 1.3568711280822754, "rewards_train/2-2": 1.290544033050537, "rewards_train/2-w": 0.9141826629638672, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.7274497151374817, "rewards_train/margins_1": 0.7801705002784729, "rewards_train/margins_2": 0.3763613700866699, "step": 88 }, { "epoch": 0.26, "logps_train/policy_1_2": -150.03225708007812, "logps_train/policy_1_l": -142.64892578125, "logps_train/policy_1_w": -96.49898529052734, "logps_train/policy_2_2": -126.80371856689453, "logps_train/policy_2_w": -121.47914123535156, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.7534143924713135, "rewards_train/1-l": -1.132763147354126, "rewards_train/1-w": 1.5481480360031128, "rewards_train/2-2": 2.4125962257385254, "rewards_train/2-w": 1.0364606380462646, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.6809111833572388, "rewards_train/margins_1": -0.20526635646820068, "rewards_train/margins_2": 1.3761355876922607, "step": 88 }, { "epoch": 0.26, "logps_train/policy_1_2": -160.02931213378906, "logps_train/policy_1_l": -135.590087890625, "logps_train/policy_1_w": -239.3891143798828, "logps_train/policy_2_2": -137.58255004882812, "logps_train/policy_2_w": -275.2936096191406, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -266.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -288.0, "rewards_train/1-2": 1.587694525718689, "rewards_train/1-l": -0.3802977204322815, "rewards_train/1-w": 2.618901252746582, "rewards_train/2-2": 1.9722135066986084, "rewards_train/2-w": 1.4272805452346802, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.9991989731788635, "rewards_train/margins_1": 1.031206727027893, "rewards_train/margins_2": 0.5449329614639282, "step": 88 }, { "epoch": 0.27, "logps_train/policy_1_2": -205.18283081054688, "logps_train/policy_1_l": -159.65887451171875, "logps_train/policy_1_w": -154.9559783935547, "logps_train/policy_2_2": -163.69859313964844, "logps_train/policy_2_w": -191.24356079101562, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.50046706199646, "rewards_train/1-l": -1.1254570484161377, "rewards_train/1-w": 2.3412187099456787, "rewards_train/2-2": 2.5762345790863037, "rewards_train/2-w": 1.639122486114502, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4666757583618164, "rewards_train/margins_1": 0.8407516479492188, "rewards_train/margins_2": 0.9371120929718018, "step": 89 }, { "epoch": 0.27, "logps_train/policy_1_2": -171.8333740234375, "logps_train/policy_1_l": -153.7105712890625, "logps_train/policy_1_w": -167.0887451171875, "logps_train/policy_2_2": -136.21597290039062, "logps_train/policy_2_w": -202.65481567382812, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 0.524476170539856, "rewards_train/1-l": -1.2327746152877808, "rewards_train/1-w": 2.1825313568115234, "rewards_train/2-2": 1.2858238220214844, "rewards_train/2-w": 1.2563949823379517, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.415305972099304, "rewards_train/margins_1": 1.6580551862716675, "rewards_train/margins_2": 0.029428839683532715, "step": 89 }, { "epoch": 0.27, "logps_train/policy_1_2": -115.638427734375, "logps_train/policy_1_l": -101.5625228881836, "logps_train/policy_1_w": -97.2837905883789, "logps_train/policy_2_2": -89.60005950927734, "logps_train/policy_2_w": -120.91358184814453, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -92.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.0170164108276367, "rewards_train/1-l": -0.9805203676223755, "rewards_train/1-w": 1.745449185371399, "rewards_train/2-2": 1.479154109954834, "rewards_train/2-w": 1.2949700355529785, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7259695529937744, "rewards_train/margins_1": 0.7284327745437622, "rewards_train/margins_2": 0.18418407440185547, "step": 89 }, { "epoch": 0.27, "logps_train/policy_1_2": -192.98800659179688, "logps_train/policy_1_l": -148.74835205078125, "logps_train/policy_1_w": -110.32206726074219, "logps_train/policy_2_2": -175.06307983398438, "logps_train/policy_2_w": -128.703125, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.690260887145996, "rewards_train/1-l": -1.1998357772827148, "rewards_train/1-w": 1.5177932977676392, "rewards_train/2-2": 2.245255470275879, "rewards_train/2-w": 1.206250786781311, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.717629075050354, "rewards_train/margins_1": -0.17246758937835693, "rewards_train/margins_2": 1.0390046834945679, "step": 89 }, { "epoch": 0.27, "logps_train/policy_1_2": -145.42514038085938, "logps_train/policy_1_l": -174.64642333984375, "logps_train/policy_1_w": -136.14932250976562, "logps_train/policy_2_2": -122.34355163574219, "logps_train/policy_2_w": -156.4196319580078, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.905923843383789, "rewards_train/1-l": -0.655266284942627, "rewards_train/1-w": 2.6116297245025635, "rewards_train/2-2": 2.139863967895508, "rewards_train/2-w": 2.108036994934082, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.2668960094451904, "rewards_train/margins_1": 0.7057058811187744, "rewards_train/margins_2": 0.03182697296142578, "step": 89 }, { "epoch": 0.27, "logps_train/policy_1_2": -135.3849639892578, "logps_train/policy_1_l": -190.856689453125, "logps_train/policy_1_w": -144.08566284179688, "logps_train/policy_2_2": -105.95005798339844, "logps_train/policy_2_w": -175.60043334960938, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.1263477802276611, "rewards_train/1-l": -1.602855920791626, "rewards_train/1-w": 1.7664345502853394, "rewards_train/2-2": 1.7528455257415771, "rewards_train/2-w": 0.9993312358856201, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3692904710769653, "rewards_train/margins_1": 0.6400867700576782, "rewards_train/margins_2": 0.753514289855957, "step": 89 }, { "epoch": 0.27, "logps_train/policy_1_2": -100.4898452758789, "logps_train/policy_1_l": -117.1898193359375, "logps_train/policy_1_w": -118.53932189941406, "logps_train/policy_2_2": -84.92549133300781, "logps_train/policy_2_w": -145.1370849609375, "logps_train/ref_1_2": -110.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 0.9478906393051147, "rewards_train/1-l": -1.1053589582443237, "rewards_train/1-w": 1.2077869176864624, "rewards_train/2-2": 1.1762011051177979, "rewards_train/2-w": 0.47457191348075867, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.313145875930786, "rewards_train/margins_1": 0.25989627838134766, "rewards_train/margins_2": 0.7016291916370392, "step": 89 }, { "epoch": 0.27, "logps_train/policy_1_2": -125.25228118896484, "logps_train/policy_1_l": -232.92831420898438, "logps_train/policy_1_w": -118.76512145996094, "logps_train/policy_2_2": -108.441162109375, "logps_train/policy_2_w": -143.09104919433594, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -214.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.6247715950012207, "rewards_train/1-l": -1.8615820407867432, "rewards_train/1-w": 1.9180195331573486, "rewards_train/2-2": 2.1574459075927734, "rewards_train/2-w": 1.4580832719802856, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.779601573944092, "rewards_train/margins_1": 0.29324793815612793, "rewards_train/margins_2": 0.6993626356124878, "step": 89 }, { "epoch": 0.27, "learning_rate": 4.904961286807285e-06, "loss": 0.8442, "step": 90 }, { "epoch": 0.27, "logps_train/policy_1_2": -149.93154907226562, "logps_train/policy_1_l": -135.95526123046875, "logps_train/policy_1_w": -106.7122802734375, "logps_train/policy_2_2": -133.30133056640625, "logps_train/policy_2_w": -123.94782257080078, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": 1.0420012474060059, "rewards_train/1-l": -0.9174011945724487, "rewards_train/1-w": 1.7443968057632446, "rewards_train/2-2": 1.2479915618896484, "rewards_train/2-w": 1.3036552667617798, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.6617980003356934, "rewards_train/margins_1": 0.7023955583572388, "rewards_train/margins_2": -0.05566370487213135, "step": 90 }, { "epoch": 0.27, "logps_train/policy_1_2": -204.834228515625, "logps_train/policy_1_l": -212.8990020751953, "logps_train/policy_1_w": -170.79869079589844, "logps_train/policy_2_2": -176.36532592773438, "logps_train/policy_2_w": -200.7911376953125, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": 1.0993897914886475, "rewards_train/1-l": -1.4428297281265259, "rewards_train/1-w": 2.7107558250427246, "rewards_train/2-2": 1.6415928602218628, "rewards_train/2-w": 1.761509895324707, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.1535855531692505, "rewards_train/margins_1": 1.6113660335540771, "rewards_train/margins_2": -0.11991703510284424, "step": 90 }, { "epoch": 0.27, "logps_train/policy_1_2": -175.50888061523438, "logps_train/policy_1_l": -209.1562042236328, "logps_train/policy_1_w": -204.38906860351562, "logps_train/policy_2_2": -143.5635223388672, "logps_train/policy_2_w": -243.23046875, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -233.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 1.3905177116394043, "rewards_train/1-l": -1.6368606090545654, "rewards_train/1-w": 2.845468044281006, "rewards_train/2-2": 2.1471638679504395, "rewards_train/2-w": 1.5144531726837158, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.482328653335571, "rewards_train/margins_1": 1.4549503326416016, "rewards_train/margins_2": 0.6327106952667236, "step": 90 }, { "epoch": 0.27, "logps_train/policy_1_2": -106.30097961425781, "logps_train/policy_1_l": -88.96659851074219, "logps_train/policy_1_w": -118.468017578125, "logps_train/policy_2_2": -83.88599395751953, "logps_train/policy_2_w": -142.4754638671875, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 0.5462692975997925, "rewards_train/1-l": -0.8187796473503113, "rewards_train/1-w": 2.0092523097991943, "rewards_train/2-2": 1.036888837814331, "rewards_train/2-w": 1.3037235736846924, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.8280319571495056, "rewards_train/margins_1": 1.4629830121994019, "rewards_train/margins_2": -0.26683473587036133, "step": 90 }, { "epoch": 0.27, "logps_train/policy_1_2": -195.5130615234375, "logps_train/policy_1_l": -90.03474426269531, "logps_train/policy_1_w": -157.93557739257812, "logps_train/policy_2_2": -164.008056640625, "logps_train/policy_2_w": -184.17562866210938, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -80.5, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.2127559185028076, "rewards_train/1-l": -0.9640215635299683, "rewards_train/1-w": 1.9283168315887451, "rewards_train/2-2": 2.001342535018921, "rewards_train/2-w": 1.2386869192123413, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8923383951187134, "rewards_train/margins_1": 0.7155609130859375, "rewards_train/margins_2": 0.7626556158065796, "step": 90 }, { "epoch": 0.27, "logps_train/policy_1_2": -159.73646545410156, "logps_train/policy_1_l": -161.9151611328125, "logps_train/policy_1_w": -128.08819580078125, "logps_train/policy_2_2": -132.49472045898438, "logps_train/policy_2_w": -155.45428466796875, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.122446894645691, "rewards_train/1-l": -1.600891351699829, "rewards_train/1-w": 2.0138370990753174, "rewards_train/2-2": 1.869887351989746, "rewards_train/2-w": 1.0045711994171143, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6147284507751465, "rewards_train/margins_1": 0.8913902044296265, "rewards_train/margins_2": 0.8653161525726318, "step": 90 }, { "epoch": 0.27, "logps_train/policy_1_2": -170.06918334960938, "logps_train/policy_1_l": -152.50881958007812, "logps_train/policy_1_w": -189.79022216796875, "logps_train/policy_2_2": -138.2399139404297, "logps_train/policy_2_w": -231.296142578125, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 2.2024576663970947, "rewards_train/1-l": -0.6730502843856812, "rewards_train/1-w": 3.024101972579956, "rewards_train/2-2": 2.6908531188964844, "rewards_train/2-w": 1.4789788722991943, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.697152256965637, "rewards_train/margins_1": 0.8216443061828613, "rewards_train/margins_2": 1.21187424659729, "step": 90 }, { "epoch": 0.27, "logps_train/policy_1_2": -80.75157165527344, "logps_train/policy_1_l": -73.59841918945312, "logps_train/policy_1_w": -100.91319274902344, "logps_train/policy_2_2": -71.44275665283203, "logps_train/policy_2_w": -118.20014190673828, "logps_train/ref_1_2": -84.0, "logps_train/ref_1_l": -65.5, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -75.5, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.31878790259361267, "rewards_train/1-l": -0.8166779279708862, "rewards_train/1-w": 1.7871954441070557, "rewards_train/2-2": 0.3970876932144165, "rewards_train/2-w": 1.5581105947494507, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 2.603873372077942, "rewards_train/margins_1": 1.468407541513443, "rewards_train/margins_2": -1.1610229015350342, "step": 90 }, { "epoch": 0.27, "logps_train/policy_1_2": -100.18289184570312, "logps_train/policy_1_l": -107.82118225097656, "logps_train/policy_1_w": -64.1551742553711, "logps_train/policy_2_2": -77.30766296386719, "logps_train/policy_2_w": -90.93778991699219, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -94.5, "logps_train/ref_2_w": -97.5, "rewards_train/1-2": 1.1707727909088135, "rewards_train/1-l": -1.2787851095199585, "rewards_train/1-w": 1.3860450983047485, "rewards_train/2-2": 1.7176713943481445, "rewards_train/2-w": 0.6468464136123657, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.664830207824707, "rewards_train/margins_1": 0.21527230739593506, "rewards_train/margins_2": 1.0708249807357788, "step": 91 }, { "epoch": 0.27, "logps_train/policy_1_2": -143.92825317382812, "logps_train/policy_1_l": -119.58665466308594, "logps_train/policy_1_w": -90.77346801757812, "logps_train/policy_2_2": -123.92244720458984, "logps_train/policy_2_w": -111.05645751953125, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -104.5, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 1.9860810041427612, "rewards_train/1-l": -0.8188223838806152, "rewards_train/1-w": 1.3523406982421875, "rewards_train/2-2": 2.362442970275879, "rewards_train/2-w": 0.8818542957305908, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.1711630821228027, "rewards_train/margins_1": -0.6337403059005737, "rewards_train/margins_2": 1.480588674545288, "step": 91 }, { "epoch": 0.27, "logps_train/policy_1_2": -224.33523559570312, "logps_train/policy_1_l": -202.825927734375, "logps_train/policy_1_w": -141.80743408203125, "logps_train/policy_2_2": -181.7060546875, "logps_train/policy_2_w": -177.72067260742188, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.3508515357971191, "rewards_train/1-l": -1.0616943836212158, "rewards_train/1-w": 2.381755828857422, "rewards_train/2-2": 2.5731449127197266, "rewards_train/2-w": 1.3216819763183594, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4434502124786377, "rewards_train/margins_1": 1.0309042930603027, "rewards_train/margins_2": 1.2514629364013672, "step": 91 }, { "epoch": 0.27, "logps_train/policy_1_2": -126.71607971191406, "logps_train/policy_1_l": -176.11135864257812, "logps_train/policy_1_w": -113.36521911621094, "logps_train/policy_2_2": -98.21488189697266, "logps_train/policy_2_w": -149.76336669921875, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 0.7885482311248779, "rewards_train/1-l": -1.4920690059661865, "rewards_train/1-w": 2.0318379402160645, "rewards_train/2-2": 1.3300745487213135, "rewards_train/2-w": 1.203741431236267, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.523906946182251, "rewards_train/margins_1": 1.2432897090911865, "rewards_train/margins_2": 0.1263331174850464, "step": 91 }, { "epoch": 0.27, "logps_train/policy_1_2": -151.0743865966797, "logps_train/policy_1_l": -121.07349395751953, "logps_train/policy_1_w": -121.38605499267578, "logps_train/policy_2_2": -128.8281707763672, "logps_train/policy_2_w": -144.32095336914062, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.7988111972808838, "rewards_train/1-l": -1.0007089376449585, "rewards_train/1-w": 1.869988203048706, "rewards_train/2-2": 2.015620708465576, "rewards_train/2-w": 1.0288424491882324, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.8706971406936646, "rewards_train/margins_1": 0.07117700576782227, "rewards_train/margins_2": 0.9867782592773438, "step": 91 }, { "epoch": 0.27, "logps_train/policy_1_2": -135.7967987060547, "logps_train/policy_1_l": -110.31665802001953, "logps_train/policy_1_w": -99.39505004882812, "logps_train/policy_2_2": -120.68350219726562, "logps_train/policy_2_w": -114.29663848876953, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 1.2521555423736572, "rewards_train/1-l": -0.37307190895080566, "rewards_train/1-w": 1.7284642457962036, "rewards_train/2-2": 1.5097750425338745, "rewards_train/2-w": 1.4172112941741943, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.1015361547470093, "rewards_train/margins_1": 0.4763087034225464, "rewards_train/margins_2": 0.09256374835968018, "step": 91 }, { "epoch": 0.27, "logps_train/policy_1_2": -137.97146606445312, "logps_train/policy_1_l": -120.7172622680664, "logps_train/policy_1_w": -111.954345703125, "logps_train/policy_2_2": -108.78863525390625, "logps_train/policy_2_w": -140.1451416015625, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 0.8036349415779114, "rewards_train/1-l": -1.396531105041504, "rewards_train/1-w": 1.2947025299072266, "rewards_train/2-2": 1.3642524480819702, "rewards_train/2-w": 0.9620479345321655, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.6912336349487305, "rewards_train/margins_1": 0.4910675883293152, "rewards_train/margins_2": 0.4022045135498047, "step": 91 }, { "epoch": 0.27, "logps_train/policy_1_2": -171.57208251953125, "logps_train/policy_1_l": -218.13009643554688, "logps_train/policy_1_w": -152.97193908691406, "logps_train/policy_2_2": -134.29824829101562, "logps_train/policy_2_w": -216.13485717773438, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 1.4521676301956177, "rewards_train/1-l": -2.698653221130371, "rewards_train/1-w": 3.017064094543457, "rewards_train/2-2": 2.1436123847961426, "rewards_train/2-w": 1.5298742055892944, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.715717315673828, "rewards_train/margins_1": 1.5648964643478394, "rewards_train/margins_2": 0.6137381792068481, "step": 91 }, { "epoch": 0.28, "learning_rate": 4.898098898861766e-06, "loss": 0.8806, "step": 92 }, { "epoch": 0.28, "logps_train/policy_1_2": -93.16020202636719, "logps_train/policy_1_l": -89.53599548339844, "logps_train/policy_1_w": -84.05793762207031, "logps_train/policy_2_2": -73.06253051757812, "logps_train/policy_2_w": -104.37403106689453, "logps_train/ref_1_2": -99.0, "logps_train/ref_1_l": -85.0, "logps_train/ref_1_w": -95.0, "logps_train/ref_2_2": -83.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 0.5956985950469971, "rewards_train/1-l": -0.46609920263290405, "rewards_train/1-w": 1.0641281604766846, "rewards_train/2-2": 0.9958951473236084, "rewards_train/2-w": 0.4743158221244812, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.5302273631095886, "rewards_train/margins_1": 0.4684295654296875, "rewards_train/margins_2": 0.5215793251991272, "step": 92 }, { "epoch": 0.28, "logps_train/policy_1_2": -190.2870635986328, "logps_train/policy_1_l": -225.54090881347656, "logps_train/policy_1_w": -214.58477783203125, "logps_train/policy_2_2": -152.041015625, "logps_train/policy_2_w": -264.157470703125, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -248.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -284.0, "rewards_train/1-2": 1.0917036533355713, "rewards_train/1-l": -1.4745259284973145, "rewards_train/1-w": 3.297870635986328, "rewards_train/2-2": 1.8414061069488525, "rewards_train/2-w": 1.9317138195037842, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.772396564483643, "rewards_train/margins_1": 2.206166982650757, "rewards_train/margins_2": -0.09030771255493164, "step": 92 }, { "epoch": 0.28, "logps_train/policy_1_2": -173.65774536132812, "logps_train/policy_1_l": -124.80438232421875, "logps_train/policy_1_w": -151.2062225341797, "logps_train/policy_2_2": -141.8964385986328, "logps_train/policy_2_w": -196.07241821289062, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 0.924069344997406, "rewards_train/1-l": -1.591766119003296, "rewards_train/1-w": 2.1184401512145996, "rewards_train/2-2": 1.6626994609832764, "rewards_train/2-w": 1.189634084701538, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.7102062702178955, "rewards_train/margins_1": 1.1943708062171936, "rewards_train/margins_2": 0.4730653762817383, "step": 92 }, { "epoch": 0.28, "logps_train/policy_1_2": -220.34959411621094, "logps_train/policy_1_l": -233.6040802001953, "logps_train/policy_1_w": -217.69415283203125, "logps_train/policy_2_2": -184.31968688964844, "logps_train/policy_2_w": -268.7702331542969, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -246.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -282.0, "rewards_train/1-2": 1.4056651592254639, "rewards_train/1-l": -2.217047929763794, "rewards_train/1-w": 2.8399596214294434, "rewards_train/2-2": 2.4313130378723145, "rewards_train/2-w": 1.367508053779602, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.057007551193237, "rewards_train/margins_1": 1.4342944622039795, "rewards_train/margins_2": 1.0638049840927124, "step": 92 }, { "epoch": 0.28, "logps_train/policy_1_2": -236.8878173828125, "logps_train/policy_1_l": -134.33740234375, "logps_train/policy_1_w": -128.26730346679688, "logps_train/policy_2_2": -188.798583984375, "logps_train/policy_2_w": -178.66026306152344, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 0.8830930590629578, "rewards_train/1-l": -0.7028806805610657, "rewards_train/1-w": 2.442018985748291, "rewards_train/2-2": 2.442016124725342, "rewards_train/2-w": 1.2995986938476562, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1448996663093567, "rewards_train/margins_1": 1.5589259266853333, "rewards_train/margins_2": 1.1424174308776855, "step": 92 }, { "epoch": 0.28, "logps_train/policy_1_2": -146.9445037841797, "logps_train/policy_1_l": -132.96278381347656, "logps_train/policy_1_w": -113.3408203125, "logps_train/policy_2_2": -122.27303314208984, "logps_train/policy_2_w": -141.26898193359375, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.7024243474006653, "rewards_train/1-l": -0.8576058745384216, "rewards_train/1-w": 0.949511706829071, "rewards_train/2-2": 1.2351971864700317, "rewards_train/2-w": 0.40591344237327576, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.8071175813674927, "rewards_train/margins_1": 0.24708735942840576, "rewards_train/margins_2": 0.829283744096756, "step": 92 }, { "epoch": 0.28, "logps_train/policy_1_2": -281.30419921875, "logps_train/policy_1_l": -285.739990234375, "logps_train/policy_1_w": -177.89337158203125, "logps_train/policy_2_2": -225.04185485839844, "logps_train/policy_2_w": -221.11033630371094, "logps_train/ref_1_2": -292.0, "logps_train/ref_1_l": -260.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -250.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 0.8883293271064758, "rewards_train/1-l": -2.5302486419677734, "rewards_train/1-w": 2.7669124603271484, "rewards_train/2-2": 2.508314847946167, "rewards_train/2-w": 1.9514667987823486, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.297161102294922, "rewards_train/margins_1": 1.8785831332206726, "rewards_train/margins_2": 0.5568480491638184, "step": 92 }, { "epoch": 0.28, "logps_train/policy_1_2": -194.57598876953125, "logps_train/policy_1_l": -114.81407165527344, "logps_train/policy_1_w": -122.37065124511719, "logps_train/policy_2_2": -169.83912658691406, "logps_train/policy_2_w": -147.6658935546875, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.4986507892608643, "rewards_train/1-l": -1.139609932899475, "rewards_train/1-w": 1.9613728523254395, "rewards_train/2-2": 2.028587579727173, "rewards_train/2-w": 1.3802841901779175, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.1009827852249146, "rewards_train/margins_1": 0.4627220630645752, "rewards_train/margins_2": 0.6483033895492554, "step": 92 }, { "epoch": 0.28, "logps_train/policy_1_2": -175.04248046875, "logps_train/policy_1_l": -136.54974365234375, "logps_train/policy_1_w": -158.83071899414062, "logps_train/policy_2_2": -144.76156616210938, "logps_train/policy_2_w": -196.96310424804688, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -125.5, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": 1.102002739906311, "rewards_train/1-l": -1.0823173522949219, "rewards_train/1-w": 2.3302102088928223, "rewards_train/2-2": 2.3629066944122314, "rewards_train/2-w": 1.1872832775115967, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.412527561187744, "rewards_train/margins_1": 1.2282074689865112, "rewards_train/margins_2": 1.1756234169006348, "step": 93 }, { "epoch": 0.28, "logps_train/policy_1_2": -125.0038070678711, "logps_train/policy_1_l": -112.30256652832031, "logps_train/policy_1_w": -114.79829406738281, "logps_train/policy_2_2": -98.84832000732422, "logps_train/policy_2_w": -144.33685302734375, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.5683693885803223, "rewards_train/1-l": -0.6869466304779053, "rewards_train/1-w": 2.4443893432617188, "rewards_train/2-2": 1.6714181900024414, "rewards_train/2-w": 1.694439172744751, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.131335973739624, "rewards_train/margins_1": 0.8760199546813965, "rewards_train/margins_2": -0.02302098274230957, "step": 93 }, { "epoch": 0.28, "logps_train/policy_1_2": -254.60940551757812, "logps_train/policy_1_l": -203.67294311523438, "logps_train/policy_1_w": -193.5188751220703, "logps_train/policy_2_2": -193.28280639648438, "logps_train/policy_2_w": -231.18394470214844, "logps_train/ref_1_2": -264.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -221.0, "logps_train/ref_2_2": -219.0, "logps_train/ref_2_w": -251.0, "rewards_train/1-2": 0.8703091740608215, "rewards_train/1-l": -2.1629977226257324, "rewards_train/1-w": 2.692643404006958, "rewards_train/2-2": 2.5842199325561523, "rewards_train/2-w": 1.9394192695617676, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.85564112663269, "rewards_train/margins_1": 1.8223342299461365, "rewards_train/margins_2": 0.6448006629943848, "step": 93 }, { "epoch": 0.28, "logps_train/policy_1_2": -216.40261840820312, "logps_train/policy_1_l": -193.09854125976562, "logps_train/policy_1_w": -166.29656982421875, "logps_train/policy_2_2": -173.55616760253906, "logps_train/policy_2_w": -210.97775268554688, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 0.8948945999145508, "rewards_train/1-l": -1.8438148498535156, "rewards_train/1-w": 2.31917142868042, "rewards_train/2-2": 2.4064929485321045, "rewards_train/2-w": 1.409646987915039, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.1629862785339355, "rewards_train/margins_1": 1.4242768287658691, "rewards_train/margins_2": 0.9968459606170654, "step": 93 }, { "epoch": 0.28, "logps_train/policy_1_2": -158.72958374023438, "logps_train/policy_1_l": -146.32470703125, "logps_train/policy_1_w": -123.86235046386719, "logps_train/policy_2_2": -130.17312622070312, "logps_train/policy_2_w": -152.3601531982422, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 0.8426652550697327, "rewards_train/1-l": -0.9195683002471924, "rewards_train/1-w": 1.8055737018585205, "rewards_train/2-2": 1.7092503309249878, "rewards_train/2-w": 1.1796098947525024, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.725142002105713, "rewards_train/margins_1": 0.9629084467887878, "rewards_train/margins_2": 0.5296404361724854, "step": 93 }, { "epoch": 0.28, "logps_train/policy_1_2": -263.3033447265625, "logps_train/policy_1_l": -274.34295654296875, "logps_train/policy_1_w": -150.20346069335938, "logps_train/policy_2_2": -221.20989990234375, "logps_train/policy_2_w": -196.56008911132812, "logps_train/ref_1_2": -280.0, "logps_train/ref_1_l": -254.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -248.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": 1.5477904081344604, "rewards_train/1-l": -2.0046069622039795, "rewards_train/1-w": 2.415591239929199, "rewards_train/2-2": 2.660261631011963, "rewards_train/2-w": 1.6455538272857666, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.420198202133179, "rewards_train/margins_1": 0.8678008317947388, "rewards_train/margins_2": 1.0147078037261963, "step": 93 }, { "epoch": 0.28, "logps_train/policy_1_2": -206.18319702148438, "logps_train/policy_1_l": -157.494384765625, "logps_train/policy_1_w": -123.56101989746094, "logps_train/policy_2_2": -175.66114807128906, "logps_train/policy_2_w": -135.95849609375, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 0.800430953502655, "rewards_train/1-l": -1.381079077720642, "rewards_train/1-w": 1.6454607248306274, "rewards_train/2-2": 1.7432596683502197, "rewards_train/2-w": 1.2510254383087158, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.0265398025512695, "rewards_train/margins_1": 0.8450297713279724, "rewards_train/margins_2": 0.4922342300415039, "step": 93 }, { "epoch": 0.28, "logps_train/policy_1_2": -193.44058227539062, "logps_train/policy_1_l": -143.46945190429688, "logps_train/policy_1_w": -126.46802520751953, "logps_train/policy_2_2": -151.48789978027344, "logps_train/policy_2_w": -162.0679473876953, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": 1.0243006944656372, "rewards_train/1-l": -1.2035869359970093, "rewards_train/1-w": 2.2000722885131836, "rewards_train/2-2": 2.081678867340088, "rewards_train/2-w": 1.5025808811187744, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.403659224510193, "rewards_train/margins_1": 1.1757715940475464, "rewards_train/margins_2": 0.5790979862213135, "step": 93 }, { "epoch": 0.28, "learning_rate": 4.891002460691306e-06, "loss": 0.7407, "step": 94 }, { "epoch": 0.28, "logps_train/policy_1_2": -142.87547302246094, "logps_train/policy_1_l": -145.88302612304688, "logps_train/policy_1_w": -110.07032775878906, "logps_train/policy_2_2": -118.77446746826172, "logps_train/policy_2_w": -135.5997314453125, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 1.4304218292236328, "rewards_train/1-l": -1.2926480770111084, "rewards_train/1-w": 1.2394517660140991, "rewards_train/2-2": 1.87411630153656, "rewards_train/2-w": 1.111120343208313, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.5320998430252075, "rewards_train/margins_1": -0.1909700632095337, "rewards_train/margins_2": 0.7629959583282471, "step": 94 }, { "epoch": 0.28, "logps_train/policy_1_2": -179.10935974121094, "logps_train/policy_1_l": -193.41903686523438, "logps_train/policy_1_w": -173.98675537109375, "logps_train/policy_2_2": -157.90069580078125, "logps_train/policy_2_w": -207.8780975341797, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -223.0, "rewards_train/1-2": 1.5578140020370483, "rewards_train/1-l": -1.841902256011963, "rewards_train/1-w": 2.4716382026672363, "rewards_train/2-2": 2.066962957382202, "rewards_train/2-w": 1.5215651988983154, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.313540458679199, "rewards_train/margins_1": 0.913824200630188, "rewards_train/margins_2": 0.5453977584838867, "step": 94 }, { "epoch": 0.28, "logps_train/policy_1_2": -302.5760498046875, "logps_train/policy_1_l": -215.522216796875, "logps_train/policy_1_w": -178.42041015625, "logps_train/policy_2_2": -217.71380615234375, "logps_train/policy_2_w": -247.90554809570312, "logps_train/ref_1_2": -302.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -209.0, "logps_train/ref_2_2": -241.0, "logps_train/ref_2_w": -264.0, "rewards_train/1-2": -0.023227959871292114, "rewards_train/1-l": -1.2678461074829102, "rewards_train/1-w": 3.0017096996307373, "rewards_train/2-2": 2.3379948139190674, "rewards_train/2-w": 1.5906953811645508, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.2695558071136475, "rewards_train/margins_1": 3.0249376595020294, "rewards_train/margins_2": 0.7472994327545166, "step": 94 }, { "epoch": 0.28, "logps_train/policy_1_2": -161.56637573242188, "logps_train/policy_1_l": -195.97198486328125, "logps_train/policy_1_w": -131.38751220703125, "logps_train/policy_2_2": -132.02479553222656, "logps_train/policy_2_w": -160.54666137695312, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.4386752843856812, "rewards_train/1-l": -1.641387701034546, "rewards_train/1-w": 1.7714046239852905, "rewards_train/2-2": 2.2006454467773438, "rewards_train/2-w": 1.4125216007232666, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4127923250198364, "rewards_train/margins_1": 0.3327293395996094, "rewards_train/margins_2": 0.7881238460540771, "step": 94 }, { "epoch": 0.28, "logps_train/policy_1_2": -234.017578125, "logps_train/policy_1_l": -172.76918029785156, "logps_train/policy_1_w": -115.72430419921875, "logps_train/policy_2_2": -199.04568481445312, "logps_train/policy_2_w": -140.30038452148438, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -217.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.7849619388580322, "rewards_train/1-l": -1.3117796182632446, "rewards_train/1-w": 1.696710228919983, "rewards_train/2-2": 1.7442607879638672, "rewards_train/2-w": 1.2285544872283936, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.0084898471832275, "rewards_train/margins_1": 0.9117482900619507, "rewards_train/margins_2": 0.5157063007354736, "step": 94 }, { "epoch": 0.28, "logps_train/policy_1_2": -195.70616149902344, "logps_train/policy_1_l": -211.25411987304688, "logps_train/policy_1_w": -161.00645446777344, "logps_train/policy_2_2": -161.12677001953125, "logps_train/policy_2_w": -197.77706909179688, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": 2.0043840408325195, "rewards_train/1-l": -2.149630308151245, "rewards_train/1-w": 2.3743538856506348, "rewards_train/2-2": 2.9037280082702637, "rewards_train/2-w": 1.5129168033599854, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.52398419380188, "rewards_train/margins_1": 0.36996984481811523, "rewards_train/margins_2": 1.3908112049102783, "step": 94 }, { "epoch": 0.28, "logps_train/policy_1_2": -161.5543670654297, "logps_train/policy_1_l": -141.78115844726562, "logps_train/policy_1_w": -108.56053161621094, "logps_train/policy_2_2": -133.1883544921875, "logps_train/policy_2_w": -141.3704376220703, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.403937816619873, "rewards_train/1-l": -0.8453035354614258, "rewards_train/1-w": 2.1861348152160645, "rewards_train/2-2": 1.945227861404419, "rewards_train/2-w": 1.270768404006958, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.0314383506774902, "rewards_train/margins_1": 0.7821969985961914, "rewards_train/margins_2": 0.6744594573974609, "step": 94 }, { "epoch": 0.28, "logps_train/policy_1_2": -142.24850463867188, "logps_train/policy_1_l": -106.93626403808594, "logps_train/policy_1_w": -137.37074279785156, "logps_train/policy_2_2": -115.88562774658203, "logps_train/policy_2_w": -171.1566162109375, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -99.5, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.5376508235931396, "rewards_train/1-l": -0.7424547672271729, "rewards_train/1-w": 1.907456398010254, "rewards_train/2-2": 2.040343761444092, "rewards_train/2-w": 0.8698843121528625, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.6499111652374268, "rewards_train/margins_1": 0.36980557441711426, "rewards_train/margins_2": 1.1704594492912292, "step": 94 }, { "epoch": 0.28, "logps_train/policy_1_2": -79.72630310058594, "logps_train/policy_1_l": -44.9541015625, "logps_train/policy_1_w": -51.274234771728516, "logps_train/policy_2_2": -61.37577438354492, "logps_train/policy_2_w": -68.66270446777344, "logps_train/ref_1_2": -86.0, "logps_train/ref_1_l": -40.75, "logps_train/ref_1_w": -62.25, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -78.0, "rewards_train/1-2": 0.625807523727417, "rewards_train/1-l": -0.4276367127895355, "rewards_train/1-w": 1.0913265943527222, "rewards_train/2-2": 1.2491410970687866, "rewards_train/2-w": 0.9501358866691589, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.5189633071422577, "rewards_train/margins_1": 0.4655190706253052, "rewards_train/margins_2": 0.2990052103996277, "step": 95 }, { "epoch": 0.28, "logps_train/policy_1_2": -191.59210205078125, "logps_train/policy_1_l": -176.31613159179688, "logps_train/policy_1_w": -89.99462890625, "logps_train/policy_2_2": -160.28317260742188, "logps_train/policy_2_w": -124.5550765991211, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 0.9704764485359192, "rewards_train/1-l": -2.2456753253936768, "rewards_train/1-w": 2.2184085845947266, "rewards_train/2-2": 1.5376980304718018, "rewards_train/2-w": 1.376523733139038, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.464083909988403, "rewards_train/margins_1": 1.2479321360588074, "rewards_train/margins_2": 0.16117429733276367, "step": 95 }, { "epoch": 0.28, "logps_train/policy_1_2": -186.6165008544922, "logps_train/policy_1_l": -165.2125244140625, "logps_train/policy_1_w": -154.04766845703125, "logps_train/policy_2_2": -155.46456909179688, "logps_train/policy_2_w": -194.86526489257812, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.503975510597229, "rewards_train/1-l": -1.453406572341919, "rewards_train/1-w": 2.8366403579711914, "rewards_train/2-2": 2.3097920417785645, "rewards_train/2-w": 1.736911654472351, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.29004693031311, "rewards_train/margins_1": 1.3326648473739624, "rewards_train/margins_2": 0.5728803873062134, "step": 95 }, { "epoch": 0.28, "logps_train/policy_1_2": -121.79918670654297, "logps_train/policy_1_l": -135.95993041992188, "logps_train/policy_1_w": -145.01126098632812, "logps_train/policy_2_2": -104.34964752197266, "logps_train/policy_2_w": -170.1318359375, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -116.5, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 0.970862090587616, "rewards_train/1-l": -1.9784146547317505, "rewards_train/1-w": 1.7613742351531982, "rewards_train/2-2": 1.5040971040725708, "rewards_train/2-w": 1.3516597747802734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.7397888898849487, "rewards_train/margins_1": 0.7905121445655823, "rewards_train/margins_2": 0.15243732929229736, "step": 95 }, { "epoch": 0.28, "logps_train/policy_1_2": -93.1902084350586, "logps_train/policy_1_l": -85.49501037597656, "logps_train/policy_1_w": -78.59732055664062, "logps_train/policy_2_2": -79.97966003417969, "logps_train/policy_2_w": -97.808349609375, "logps_train/ref_1_2": -107.0, "logps_train/ref_1_l": -79.0, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -95.5, "logps_train/ref_2_w": -106.5, "rewards_train/1-2": 1.3606667518615723, "rewards_train/1-l": -0.6389545202255249, "rewards_train/1-w": 1.3722999095916748, "rewards_train/2-2": 1.5573070049285889, "rewards_train/2-w": 0.8597896695137024, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.0112544298171997, "rewards_train/margins_1": 0.011633157730102539, "rewards_train/margins_2": 0.6975173354148865, "step": 95 }, { "epoch": 0.28, "logps_train/policy_1_2": -214.77798461914062, "logps_train/policy_1_l": -235.8853759765625, "logps_train/policy_1_w": -148.45542907714844, "logps_train/policy_2_2": -182.86257934570312, "logps_train/policy_2_w": -181.92819213867188, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -207.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": 1.68782639503479, "rewards_train/1-l": -2.8983025550842285, "rewards_train/1-w": 1.9903943538665771, "rewards_train/2-2": 2.690304756164551, "rewards_train/2-w": 1.4993681907653809, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.888696908950806, "rewards_train/margins_1": 0.3025679588317871, "rewards_train/margins_2": 1.19093656539917, "step": 95 }, { "epoch": 0.28, "logps_train/policy_1_2": -128.76010131835938, "logps_train/policy_1_l": -111.2584228515625, "logps_train/policy_1_w": -121.10089874267578, "logps_train/policy_2_2": -100.73234558105469, "logps_train/policy_2_w": -169.3902130126953, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.2927393913269043, "rewards_train/1-l": -0.49049049615859985, "rewards_train/1-w": 1.9805346727371216, "rewards_train/2-2": 1.6423900127410889, "rewards_train/2-w": 0.685978889465332, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.4710251688957214, "rewards_train/margins_1": 0.6877952814102173, "rewards_train/margins_2": 0.9564111232757568, "step": 95 }, { "epoch": 0.28, "logps_train/policy_1_2": -142.06527709960938, "logps_train/policy_1_l": -157.41758728027344, "logps_train/policy_1_w": -117.55811309814453, "logps_train/policy_2_2": -113.88182067871094, "logps_train/policy_2_w": -150.54898071289062, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 0.8215957880020142, "rewards_train/1-l": -1.8931264877319336, "rewards_train/1-w": 1.5012201070785522, "rewards_train/2-2": 1.650879979133606, "rewards_train/2-w": 0.904477059841156, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.394346594810486, "rewards_train/margins_1": 0.6796243190765381, "rewards_train/margins_2": 0.74640291929245, "step": 95 }, { "epoch": 0.29, "learning_rate": 4.883672664895761e-06, "loss": 0.7967, "step": 96 }, { "epoch": 0.29, "logps_train/policy_1_2": -177.93377685546875, "logps_train/policy_1_l": -137.44187927246094, "logps_train/policy_1_w": -131.982421875, "logps_train/policy_2_2": -132.38406372070312, "logps_train/policy_2_w": -168.3369140625, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 0.9745919704437256, "rewards_train/1-l": -1.0176254510879517, "rewards_train/1-w": 1.4955066442489624, "rewards_train/2-2": 1.9022190570831299, "rewards_train/2-w": 1.0694336891174316, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.513132095336914, "rewards_train/margins_1": 0.5209146738052368, "rewards_train/margins_2": 0.8327853679656982, "step": 96 }, { "epoch": 0.29, "logps_train/policy_1_2": -162.33535766601562, "logps_train/policy_1_l": -119.01045227050781, "logps_train/policy_1_w": -120.4648666381836, "logps_train/policy_2_2": -134.34873962402344, "logps_train/policy_2_w": -156.40969848632812, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.213340401649475, "rewards_train/1-l": -1.0462596416473389, "rewards_train/1-w": 1.9356417655944824, "rewards_train/2-2": 1.9854393005371094, "rewards_train/2-w": 0.7673060297966003, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.9819014072418213, "rewards_train/margins_1": 0.7223013639450073, "rewards_train/margins_2": 1.218133270740509, "step": 96 }, { "epoch": 0.29, "logps_train/policy_1_2": -74.89035034179688, "logps_train/policy_1_l": -99.30641174316406, "logps_train/policy_1_w": -104.73153686523438, "logps_train/policy_2_2": -60.82063293457031, "logps_train/policy_2_w": -125.31843566894531, "logps_train/ref_1_2": -81.5, "logps_train/ref_1_l": -90.5, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -70.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.6402618288993835, "rewards_train/1-l": -0.8790782690048218, "rewards_train/1-w": 1.6049716472625732, "rewards_train/2-2": 0.9257492423057556, "rewards_train/2-w": 0.9994059801101685, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.484049916267395, "rewards_train/margins_1": 0.9647098183631897, "rewards_train/margins_2": -0.07365673780441284, "step": 96 }, { "epoch": 0.29, "logps_train/policy_1_2": -226.74917602539062, "logps_train/policy_1_l": -224.98773193359375, "logps_train/policy_1_w": -166.768798828125, "logps_train/policy_2_2": -189.32078552246094, "logps_train/policy_2_w": -210.61480712890625, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -211.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.324301838874817, "rewards_train/1-l": -2.27943754196167, "rewards_train/1-w": 2.3153066635131836, "rewards_train/2-2": 2.1604995727539062, "rewards_train/2-w": 1.0963318347930908, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.5947442054748535, "rewards_train/margins_1": 0.9910048246383667, "rewards_train/margins_2": 1.0641677379608154, "step": 96 }, { "epoch": 0.29, "logps_train/policy_1_2": -148.63250732421875, "logps_train/policy_1_l": -132.0872802734375, "logps_train/policy_1_w": -150.21185302734375, "logps_train/policy_2_2": -120.88054656982422, "logps_train/policy_2_w": -176.30535888671875, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.5758107900619507, "rewards_train/1-l": -0.6919301748275757, "rewards_train/1-w": 2.0381884574890137, "rewards_train/2-2": 2.1420235633850098, "rewards_train/2-w": 1.2206364870071411, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.7301186323165894, "rewards_train/margins_1": 0.462377667427063, "rewards_train/margins_2": 0.9213870763778687, "step": 96 }, { "epoch": 0.29, "logps_train/policy_1_2": -138.7915802001953, "logps_train/policy_1_l": -126.16995239257812, "logps_train/policy_1_w": -144.06080627441406, "logps_train/policy_2_2": -116.5379409790039, "logps_train/policy_2_w": -180.9310302734375, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 0.7192792892456055, "rewards_train/1-l": -1.030666470527649, "rewards_train/1-w": 2.4814186096191406, "rewards_train/2-2": 1.100893497467041, "rewards_train/2-w": 1.6412711143493652, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 3.5120850801467896, "rewards_train/margins_1": 1.7621393203735352, "rewards_train/margins_2": -0.5403776168823242, "step": 96 }, { "epoch": 0.29, "logps_train/policy_1_2": -141.66970825195312, "logps_train/policy_1_l": -251.84832763671875, "logps_train/policy_1_w": -112.71148681640625, "logps_train/policy_2_2": -103.20062255859375, "logps_train/policy_2_w": -140.84083557128906, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.0674035549163818, "rewards_train/1-l": -4.023505687713623, "rewards_train/1-w": 1.9479914903640747, "rewards_train/2-2": 2.054938793182373, "rewards_train/2-w": 1.4577138423919678, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.971497178077698, "rewards_train/margins_1": 0.8805879354476929, "rewards_train/margins_2": 0.5972249507904053, "step": 96 }, { "epoch": 0.29, "logps_train/policy_1_2": -201.61886596679688, "logps_train/policy_1_l": -209.71548461914062, "logps_train/policy_1_w": -162.49838256835938, "logps_train/policy_2_2": -161.97647094726562, "logps_train/policy_2_w": -217.00340270996094, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 1.7381136417388916, "rewards_train/1-l": -1.9340486526489258, "rewards_train/1-w": 2.0642240047454834, "rewards_train/2-2": 2.727353096008301, "rewards_train/2-w": 0.707471489906311, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.998272657394409, "rewards_train/margins_1": 0.3261103630065918, "rewards_train/margins_2": 2.0198816061019897, "step": 96 }, { "epoch": 0.29, "logps_train/policy_1_2": -155.38880920410156, "logps_train/policy_1_l": -113.66729736328125, "logps_train/policy_1_w": -90.22880554199219, "logps_train/policy_2_2": -134.2493896484375, "logps_train/policy_2_w": -104.41963195800781, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 0.6986198425292969, "rewards_train/1-l": -0.32141709327697754, "rewards_train/1-w": 1.0325872898101807, "rewards_train/2-2": 1.3781859874725342, "rewards_train/2-w": 0.9674118757247925, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.3540043830871582, "rewards_train/margins_1": 0.3339674472808838, "rewards_train/margins_2": 0.4107741117477417, "step": 97 }, { "epoch": 0.29, "logps_train/policy_1_2": -141.2726287841797, "logps_train/policy_1_l": -117.44610595703125, "logps_train/policy_1_w": -116.62324523925781, "logps_train/policy_2_2": -120.60121154785156, "logps_train/policy_2_w": -145.97657775878906, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -109.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.593830943107605, "rewards_train/1-l": -0.8000792264938354, "rewards_train/1-w": 1.7361128330230713, "rewards_train/2-2": 1.9515970945358276, "rewards_train/2-w": 0.971092700958252, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.5361920595169067, "rewards_train/margins_1": 0.1422818899154663, "rewards_train/margins_2": 0.9805043935775757, "step": 97 }, { "epoch": 0.29, "logps_train/policy_1_2": -252.33851623535156, "logps_train/policy_1_l": -261.9670715332031, "logps_train/policy_1_w": -167.98489379882812, "logps_train/policy_2_2": -209.4200897216797, "logps_train/policy_2_w": -209.07968139648438, "logps_train/ref_1_2": -276.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -246.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 2.3536477088928223, "rewards_train/1-l": -3.3838179111480713, "rewards_train/1-w": 2.8061981201171875, "rewards_train/2-2": 3.5970535278320312, "rewards_train/2-w": 1.4639074802398682, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.190016031265259, "rewards_train/margins_1": 0.45255041122436523, "rewards_train/margins_2": 2.133146047592163, "step": 97 }, { "epoch": 0.29, "logps_train/policy_1_2": -184.549072265625, "logps_train/policy_1_l": -246.54788208007812, "logps_train/policy_1_w": -146.74981689453125, "logps_train/policy_2_2": -149.75486755371094, "logps_train/policy_2_w": -195.6772003173828, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.2919689416885376, "rewards_train/1-l": -2.876662492752075, "rewards_train/1-w": 2.4656434059143066, "rewards_train/2-2": 2.3198251724243164, "rewards_train/2-w": 1.072904109954834, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.342305898666382, "rewards_train/margins_1": 1.173674464225769, "rewards_train/margins_2": 1.2469210624694824, "step": 97 }, { "epoch": 0.29, "logps_train/policy_1_2": -88.26313781738281, "logps_train/policy_1_l": -86.21504211425781, "logps_train/policy_1_w": -49.37386703491211, "logps_train/policy_2_2": -71.30686950683594, "logps_train/policy_2_w": -63.54217529296875, "logps_train/ref_1_2": -98.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -57.5, "logps_train/ref_2_2": -85.5, "logps_train/ref_2_w": -68.0, "rewards_train/1-2": 0.9566946029663086, "rewards_train/1-l": -0.7904495596885681, "rewards_train/1-w": 0.8092929124832153, "rewards_train/2-2": 1.419215202331543, "rewards_train/2-w": 0.47136837244033813, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.5997424721717834, "rewards_train/margins_1": -0.14740169048309326, "rewards_train/margins_2": 0.9478468298912048, "step": 97 }, { "epoch": 0.29, "logps_train/policy_1_2": -169.70205688476562, "logps_train/policy_1_l": -141.33566284179688, "logps_train/policy_1_w": -169.29122924804688, "logps_train/policy_2_2": -137.75094604492188, "logps_train/policy_2_w": -207.00494384765625, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.478231430053711, "rewards_train/1-l": -0.8085672855377197, "rewards_train/1-w": 2.308375835418701, "rewards_train/2-2": 2.479593276977539, "rewards_train/2-w": 1.187004566192627, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.116943120956421, "rewards_train/margins_1": 0.8301444053649902, "rewards_train/margins_2": 1.292588710784912, "step": 97 }, { "epoch": 0.29, "logps_train/policy_1_2": -153.8426513671875, "logps_train/policy_1_l": -152.91510009765625, "logps_train/policy_1_w": -121.03929138183594, "logps_train/policy_2_2": -136.30197143554688, "logps_train/policy_2_w": -142.87704467773438, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 0.5989365577697754, "rewards_train/1-l": -1.231353521347046, "rewards_train/1-w": 1.7148213386535645, "rewards_train/2-2": 1.074881672859192, "rewards_train/2-w": 1.2529217004776, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.9461748600006104, "rewards_train/margins_1": 1.115884780883789, "rewards_train/margins_2": -0.1780400276184082, "step": 97 }, { "epoch": 0.29, "logps_train/policy_1_2": -179.25575256347656, "logps_train/policy_1_l": -98.20742797851562, "logps_train/policy_1_w": -111.03239440917969, "logps_train/policy_2_2": -148.64491271972656, "logps_train/policy_2_w": -144.90451049804688, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 1.3994251489639282, "rewards_train/1-l": -0.3854883909225464, "rewards_train/1-w": 1.372542381286621, "rewards_train/2-2": 1.7792582511901855, "rewards_train/2-w": 0.6165798902511597, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.7580307722091675, "rewards_train/margins_1": -0.02688276767730713, "rewards_train/margins_2": 1.1626783609390259, "step": 97 }, { "epoch": 0.29, "learning_rate": 4.876110226850278e-06, "loss": 0.8421, "step": 98 }, { "epoch": 0.29, "logps_train/policy_1_2": -101.2298583984375, "logps_train/policy_1_l": -95.96087646484375, "logps_train/policy_1_w": -61.710689544677734, "logps_train/policy_2_2": -84.55521392822266, "logps_train/policy_2_w": -77.83987426757812, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -71.5, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -85.0, "rewards_train/1-2": 1.2660768032073975, "rewards_train/1-l": -0.885931670665741, "rewards_train/1-w": 0.9898684620857239, "rewards_train/2-2": 1.5160608291625977, "rewards_train/2-w": 0.6738253235816956, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.8758001327514648, "rewards_train/margins_1": -0.2762083411216736, "rewards_train/margins_2": 0.8422355055809021, "step": 98 }, { "epoch": 0.29, "logps_train/policy_1_2": -203.70367431640625, "logps_train/policy_1_l": -217.0660400390625, "logps_train/policy_1_w": -129.73069763183594, "logps_train/policy_2_2": -167.98837280273438, "logps_train/policy_2_w": -159.16876220703125, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.0608829259872437, "rewards_train/1-l": -2.013636589050293, "rewards_train/1-w": 2.136305332183838, "rewards_train/2-2": 2.398036479949951, "rewards_train/2-w": 1.4081242084503174, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.149941921234131, "rewards_train/margins_1": 1.0754224061965942, "rewards_train/margins_2": 0.9899122714996338, "step": 98 }, { "epoch": 0.29, "logps_train/policy_1_2": -170.99888610839844, "logps_train/policy_1_l": -165.51951599121094, "logps_train/policy_1_w": -142.26602172851562, "logps_train/policy_2_2": -134.819580078125, "logps_train/policy_2_w": -172.49444580078125, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 1.4563617706298828, "rewards_train/1-l": -0.916795015335083, "rewards_train/1-w": 2.5007429122924805, "rewards_train/2-2": 2.0352299213409424, "rewards_train/2-w": 1.4552435874938965, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4175379276275635, "rewards_train/margins_1": 1.0443811416625977, "rewards_train/margins_2": 0.5799863338470459, "step": 98 }, { "epoch": 0.29, "logps_train/policy_1_2": -143.5020751953125, "logps_train/policy_1_l": -145.74789428710938, "logps_train/policy_1_w": -123.59202575683594, "logps_train/policy_2_2": -129.08526611328125, "logps_train/policy_2_w": -147.08279418945312, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.2562392950057983, "rewards_train/1-l": -0.6520353555679321, "rewards_train/1-w": 1.8876726627349854, "rewards_train/2-2": 1.4932308197021484, "rewards_train/2-w": 1.307344675064087, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.5397080183029175, "rewards_train/margins_1": 0.631433367729187, "rewards_train/margins_2": 0.18588614463806152, "step": 98 }, { "epoch": 0.29, "logps_train/policy_1_2": -90.64564514160156, "logps_train/policy_1_l": -49.04697036743164, "logps_train/policy_1_w": -118.64865112304688, "logps_train/policy_2_2": -74.37993621826172, "logps_train/policy_2_w": -144.82913208007812, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -48.25, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -86.5, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 0.9198108911514282, "rewards_train/1-l": -0.07950162887573242, "rewards_train/1-w": 1.9246853590011597, "rewards_train/2-2": 1.2112252712249756, "rewards_train/2-w": 1.2018510103225708, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.004186987876892, "rewards_train/margins_1": 1.0048744678497314, "rewards_train/margins_2": 0.009374260902404785, "step": 98 }, { "epoch": 0.29, "logps_train/policy_1_2": -190.1272735595703, "logps_train/policy_1_l": -167.38320922851562, "logps_train/policy_1_w": -154.65406799316406, "logps_train/policy_2_2": -161.33221435546875, "logps_train/policy_2_w": -182.932861328125, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.6278972625732422, "rewards_train/1-l": -1.7926175594329834, "rewards_train/1-w": 3.0950419902801514, "rewards_train/2-2": 2.383185386657715, "rewards_train/2-w": 2.339527130126953, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.887659549713135, "rewards_train/margins_1": 1.4671447277069092, "rewards_train/margins_2": 0.04365825653076172, "step": 98 }, { "epoch": 0.29, "logps_train/policy_1_2": -140.40928649902344, "logps_train/policy_1_l": -86.33700561523438, "logps_train/policy_1_w": -75.39697265625, "logps_train/policy_2_2": -118.02181243896484, "logps_train/policy_2_w": -92.196044921875, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -79.0, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -97.0, "rewards_train/1-2": 1.9434456825256348, "rewards_train/1-l": -0.7478116750717163, "rewards_train/1-w": 1.0601567029953003, "rewards_train/2-2": 2.517350196838379, "rewards_train/2-w": 0.49150338768959045, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.8079683780670166, "rewards_train/margins_1": -0.8832889795303345, "rewards_train/margins_2": 2.0258468091487885, "step": 98 }, { "epoch": 0.29, "logps_train/policy_1_2": -106.37049865722656, "logps_train/policy_1_l": -76.06101989746094, "logps_train/policy_1_w": -96.91753387451172, "logps_train/policy_2_2": -89.29917907714844, "logps_train/policy_2_w": -116.27336883544922, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -68.5, "logps_train/ref_1_w": -112.5, "logps_train/ref_2_2": -104.5, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.3106064796447754, "rewards_train/1-l": -0.7570782899856567, "rewards_train/1-w": 1.5512151718139648, "rewards_train/2-2": 1.531019926071167, "rewards_train/2-w": 0.9625066518783569, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.3082934617996216, "rewards_train/margins_1": 0.24060869216918945, "rewards_train/margins_2": 0.5685132741928101, "step": 98 }, { "epoch": 0.3, "logps_train/policy_1_2": -138.5452423095703, "logps_train/policy_1_l": -183.00851440429688, "logps_train/policy_1_w": -124.63524627685547, "logps_train/policy_2_2": -113.41879272460938, "logps_train/policy_2_w": -159.13531494140625, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.6392265558242798, "rewards_train/1-l": -2.890645742416382, "rewards_train/1-w": 1.9302259683609009, "rewards_train/2-2": 2.208120822906494, "rewards_train/2-w": 1.2427196502685547, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.820871710777283, "rewards_train/margins_1": 0.2909994125366211, "rewards_train/margins_2": 0.9654011726379395, "step": 99 }, { "epoch": 0.3, "logps_train/policy_1_2": -172.4078826904297, "logps_train/policy_1_l": -169.27493286132812, "logps_train/policy_1_w": -138.54129028320312, "logps_train/policy_2_2": -151.90480041503906, "logps_train/policy_2_w": -171.03057861328125, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": 1.2631185054779053, "rewards_train/1-l": -0.33764976263046265, "rewards_train/1-w": 1.6904008388519287, "rewards_train/2-2": 1.7825671434402466, "rewards_train/2-w": 0.9883471727371216, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.0280506014823914, "rewards_train/margins_1": 0.42728233337402344, "rewards_train/margins_2": 0.794219970703125, "step": 99 }, { "epoch": 0.3, "logps_train/policy_1_2": -180.30551147460938, "logps_train/policy_1_l": -200.04165649414062, "logps_train/policy_1_w": -134.51263427734375, "logps_train/policy_2_2": -159.43502807617188, "logps_train/policy_2_w": -161.54783630371094, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.750698447227478, "rewards_train/1-l": -1.7752585411071777, "rewards_train/1-w": 1.5909245014190674, "rewards_train/2-2": 2.1463401317596436, "rewards_train/2-w": 1.1030285358428955, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.366183042526245, "rewards_train/margins_1": -0.15977394580841064, "rewards_train/margins_2": 1.043311595916748, "step": 99 }, { "epoch": 0.3, "logps_train/policy_1_2": -192.97486877441406, "logps_train/policy_1_l": -177.51177978515625, "logps_train/policy_1_w": -166.93377685546875, "logps_train/policy_2_2": -165.00360107421875, "logps_train/policy_2_w": -193.38978576660156, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.354857325553894, "rewards_train/1-l": -1.3808650970458984, "rewards_train/1-w": 2.155059337615967, "rewards_train/2-2": 2.0328428745269775, "rewards_train/2-w": 1.696958303451538, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.5359244346618652, "rewards_train/margins_1": 0.8002020120620728, "rewards_train/margins_2": 0.33588457107543945, "step": 99 }, { "epoch": 0.3, "logps_train/policy_1_2": -64.76074981689453, "logps_train/policy_1_l": -67.89387512207031, "logps_train/policy_1_w": -63.91312026977539, "logps_train/policy_2_2": -50.98571014404297, "logps_train/policy_2_w": -73.41145324707031, "logps_train/ref_1_2": -69.0, "logps_train/ref_1_l": -60.0, "logps_train/ref_1_w": -72.0, "logps_train/ref_2_2": -60.25, "logps_train/ref_2_w": -78.5, "rewards_train/1-2": 0.4459952712059021, "rewards_train/1-l": -0.7928051352500916, "rewards_train/1-w": 0.7985316514968872, "rewards_train/2-2": 0.9241828918457031, "rewards_train/2-w": 0.5315114259719849, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.5913367867469788, "rewards_train/margins_1": 0.3525363802909851, "rewards_train/margins_2": 0.39267146587371826, "step": 99 }, { "epoch": 0.3, "logps_train/policy_1_2": -231.45291137695312, "logps_train/policy_1_l": -199.5797882080078, "logps_train/policy_1_w": -184.03860473632812, "logps_train/policy_2_2": -203.23291015625, "logps_train/policy_2_w": -219.34439086914062, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -203.0, "logps_train/ref_2_2": -221.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 0.9515836834907532, "rewards_train/1-l": -1.9771192073822021, "rewards_train/1-w": 1.8519995212554932, "rewards_train/2-2": 1.7821778059005737, "rewards_train/2-w": 0.8936865925788879, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.8291187286376953, "rewards_train/margins_1": 0.90041583776474, "rewards_train/margins_2": 0.8884912133216858, "step": 99 }, { "epoch": 0.3, "logps_train/policy_1_2": -173.05654907226562, "logps_train/policy_1_l": -182.22952270507812, "logps_train/policy_1_w": -177.31761169433594, "logps_train/policy_2_2": -145.98165893554688, "logps_train/policy_2_w": -215.2567138671875, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.0599695444107056, "rewards_train/1-l": -1.060452938079834, "rewards_train/1-w": 2.168238878250122, "rewards_train/2-2": 1.6862092018127441, "rewards_train/2-w": 1.3993287086486816, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.228691816329956, "rewards_train/margins_1": 1.1082693338394165, "rewards_train/margins_2": 0.2868804931640625, "step": 99 }, { "epoch": 0.3, "logps_train/policy_1_2": -111.3951644897461, "logps_train/policy_1_l": -93.82938385009766, "logps_train/policy_1_w": -71.453857421875, "logps_train/policy_2_2": -94.72962951660156, "logps_train/policy_2_w": -88.76439666748047, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -86.5, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -99.0, "rewards_train/1-2": 1.1819674968719482, "rewards_train/1-l": -0.6660438776016235, "rewards_train/1-w": 1.494458556175232, "rewards_train/2-2": 1.4501817226409912, "rewards_train/2-w": 1.0007083415985107, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.1605024337768555, "rewards_train/margins_1": 0.3124910593032837, "rewards_train/margins_2": 0.44947338104248047, "step": 99 }, { "epoch": 0.3, "learning_rate": 4.868315884635479e-06, "loss": 0.9277, "step": 100 }, { "epoch": 0.3, "logps_train/policy_1_2": -171.24234008789062, "logps_train/policy_1_l": -133.76638793945312, "logps_train/policy_1_w": -156.0033416748047, "logps_train/policy_2_2": -150.68392944335938, "logps_train/policy_2_w": -183.29653930664062, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.7593601942062378, "rewards_train/1-l": -0.5560812950134277, "rewards_train/1-w": 2.295954704284668, "rewards_train/2-2": 2.1597321033477783, "rewards_train/2-w": 1.5185878276824951, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8520359992980957, "rewards_train/margins_1": 0.5365945100784302, "rewards_train/margins_2": 0.6411442756652832, "step": 100 }, { "epoch": 0.3, "logps_train/policy_1_2": -218.10781860351562, "logps_train/policy_1_l": -210.6756591796875, "logps_train/policy_1_w": -225.43206787109375, "logps_train/policy_2_2": -188.35147094726562, "logps_train/policy_2_w": -267.4412536621094, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -256.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -288.0, "rewards_train/1-2": 1.8670997619628906, "rewards_train/1-l": -1.6972532272338867, "rewards_train/1-w": 3.1159729957580566, "rewards_train/2-2": 2.7678794860839844, "rewards_train/2-w": 2.130680561065674, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.813226222991943, "rewards_train/margins_1": 1.248873233795166, "rewards_train/margins_2": 0.6371989250183105, "step": 100 }, { "epoch": 0.3, "logps_train/policy_1_2": -221.2291717529297, "logps_train/policy_1_l": -232.802734375, "logps_train/policy_1_w": -186.91400146484375, "logps_train/policy_2_2": -186.01800537109375, "logps_train/policy_2_w": -212.8435516357422, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -213.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 2.514583110809326, "rewards_train/1-l": -1.505272626876831, "rewards_train/1-w": 2.621100664138794, "rewards_train/2-2": 3.2231991291046143, "rewards_train/2-w": 1.978144645690918, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.126373291015625, "rewards_train/margins_1": 0.10651755332946777, "rewards_train/margins_2": 1.2450544834136963, "step": 100 }, { "epoch": 0.3, "logps_train/policy_1_2": -163.47097778320312, "logps_train/policy_1_l": -223.6357421875, "logps_train/policy_1_w": -157.44491577148438, "logps_train/policy_2_2": -138.01651000976562, "logps_train/policy_2_w": -194.56637573242188, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 2.024778127670288, "rewards_train/1-l": -1.8975586891174316, "rewards_train/1-w": 1.9406636953353882, "rewards_train/2-2": 2.4983487129211426, "rewards_train/2-w": 0.8855506181716919, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.83822238445282, "rewards_train/margins_1": -0.0841144323348999, "rewards_train/margins_2": 1.6127980947494507, "step": 100 }, { "epoch": 0.3, "logps_train/policy_1_2": -213.57272338867188, "logps_train/policy_1_l": -247.02871704101562, "logps_train/policy_1_w": -221.9645538330078, "logps_train/policy_2_2": -187.61819458007812, "logps_train/policy_2_w": -267.60931396484375, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -221.0, "logps_train/ref_1_w": -252.0, "logps_train/ref_2_2": -211.0, "logps_train/ref_2_w": -284.0, "rewards_train/1-2": 1.958353042602539, "rewards_train/1-l": -2.6200594902038574, "rewards_train/1-w": 2.9269814491271973, "rewards_train/2-2": 2.3256795406341553, "rewards_train/2-w": 1.7578179836273193, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.547040939331055, "rewards_train/margins_1": 0.9686284065246582, "rewards_train/margins_2": 0.5678615570068359, "step": 100 }, { "epoch": 0.3, "logps_train/policy_1_2": -202.739013671875, "logps_train/policy_1_l": -164.525146484375, "logps_train/policy_1_w": -147.62338256835938, "logps_train/policy_2_2": -158.62876892089844, "logps_train/policy_2_w": -191.893798828125, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 0.929223358631134, "rewards_train/1-l": -1.0322022438049316, "rewards_train/1-w": 1.786098599433899, "rewards_train/2-2": 2.130872964859009, "rewards_train/2-w": 0.6074947118759155, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8183008432388306, "rewards_train/margins_1": 0.8568752408027649, "rewards_train/margins_2": 1.5233782529830933, "step": 100 }, { "epoch": 0.3, "logps_train/policy_1_2": -165.77713012695312, "logps_train/policy_1_l": -197.527587890625, "logps_train/policy_1_w": -136.86068725585938, "logps_train/policy_2_2": -142.76535034179688, "logps_train/policy_2_w": -167.88868713378906, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 1.1277549266815186, "rewards_train/1-l": -2.539283037185669, "rewards_train/1-w": 2.344789981842041, "rewards_train/2-2": 1.8564720153808594, "rewards_train/2-w": 1.5095692873001099, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.88407301902771, "rewards_train/margins_1": 1.2170350551605225, "rewards_train/margins_2": 0.3469027280807495, "step": 100 }, { "epoch": 0.3, "logps_train/policy_1_2": -221.4094696044922, "logps_train/policy_1_l": -173.728515625, "logps_train/policy_1_w": -94.62345123291016, "logps_train/policy_2_2": -173.43682861328125, "logps_train/policy_2_w": -127.07108306884766, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 0.9879588484764099, "rewards_train/1-l": -1.5916035175323486, "rewards_train/1-w": 1.7114825248718262, "rewards_train/2-2": 2.635613441467285, "rewards_train/2-w": 1.0901577472686768, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.303086042404175, "rewards_train/margins_1": 0.7235236763954163, "rewards_train/margins_2": 1.5454556941986084, "step": 100 }, { "epoch": 0.3, "logps_train/policy_1_2": -80.76692962646484, "logps_train/policy_1_l": -91.05104064941406, "logps_train/policy_1_w": -69.08924865722656, "logps_train/policy_2_2": -63.75111770629883, "logps_train/policy_2_w": -88.065673828125, "logps_train/ref_1_2": -89.0, "logps_train/ref_1_l": -77.5, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -75.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": 0.8209633231163025, "rewards_train/1-l": -1.3594012260437012, "rewards_train/1-w": 1.071934461593628, "rewards_train/2-2": 1.1487162113189697, "rewards_train/2-w": 0.5129637122154236, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.431335687637329, "rewards_train/margins_1": 0.25097113847732544, "rewards_train/margins_2": 0.6357524991035461, "step": 101 }, { "epoch": 0.3, "logps_train/policy_1_2": -136.546142578125, "logps_train/policy_1_l": -171.12942504882812, "logps_train/policy_1_w": -117.6683578491211, "logps_train/policy_2_2": -104.59134674072266, "logps_train/policy_2_w": -147.8541259765625, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.244604229927063, "rewards_train/1-l": -1.4094276428222656, "rewards_train/1-w": 1.5034765005111694, "rewards_train/2-2": 1.7518028020858765, "rewards_train/2-w": 1.036463737487793, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.912904143333435, "rewards_train/margins_1": 0.25887227058410645, "rewards_train/margins_2": 0.7153390645980835, "step": 101 }, { "epoch": 0.3, "logps_train/policy_1_2": -194.429931640625, "logps_train/policy_1_l": -149.6257781982422, "logps_train/policy_1_w": -134.03482055664062, "logps_train/policy_2_2": -165.35452270507812, "logps_train/policy_2_w": -162.31588745117188, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": 1.5820069313049316, "rewards_train/1-l": -0.6711711883544922, "rewards_train/1-w": 2.1058928966522217, "rewards_train/2-2": 2.2207984924316406, "rewards_train/2-w": 1.4621613025665283, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.777064085006714, "rewards_train/margins_1": 0.52388596534729, "rewards_train/margins_2": 0.7586371898651123, "step": 101 }, { "epoch": 0.3, "logps_train/policy_1_2": -155.72373962402344, "logps_train/policy_1_l": -169.56077575683594, "logps_train/policy_1_w": -136.78195190429688, "logps_train/policy_2_2": -133.03793334960938, "logps_train/policy_2_w": -166.8924102783203, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 1.8901257514953613, "rewards_train/1-l": -1.066429615020752, "rewards_train/1-w": 2.187429428100586, "rewards_train/2-2": 2.1712074279785156, "rewards_train/2-w": 1.5849781036376953, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.253859043121338, "rewards_train/margins_1": 0.2973036766052246, "rewards_train/margins_2": 0.5862293243408203, "step": 101 }, { "epoch": 0.3, "logps_train/policy_1_2": -231.39227294921875, "logps_train/policy_1_l": -228.76083374023438, "logps_train/policy_1_w": -190.61233520507812, "logps_train/policy_2_2": -198.771484375, "logps_train/policy_2_w": -221.04653930664062, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -227.0, "logps_train/ref_2_w": -243.0, "rewards_train/1-2": 1.6420224905014038, "rewards_train/1-l": -1.7479596138000488, "rewards_train/1-w": 3.326265335083008, "rewards_train/2-2": 2.8541030883789062, "rewards_train/2-w": 2.145345449447632, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.074224948883057, "rewards_train/margins_1": 1.684242844581604, "rewards_train/margins_2": 0.7087576389312744, "step": 101 }, { "epoch": 0.3, "logps_train/policy_1_2": -214.39381408691406, "logps_train/policy_1_l": -193.7045135498047, "logps_train/policy_1_w": -203.52024841308594, "logps_train/policy_2_2": -179.1805419921875, "logps_train/policy_2_w": -244.09144592285156, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 1.3387432098388672, "rewards_train/1-l": -1.3704512119293213, "rewards_train/1-w": 2.0510997772216797, "rewards_train/2-2": 2.351475954055786, "rewards_train/2-w": 1.1830427646636963, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.421550989151001, "rewards_train/margins_1": 0.7123565673828125, "rewards_train/margins_2": 1.1684331893920898, "step": 101 }, { "epoch": 0.3, "logps_train/policy_1_2": -96.000244140625, "logps_train/policy_1_l": -52.89857864379883, "logps_train/policy_1_w": -94.62042236328125, "logps_train/policy_2_2": -74.58385467529297, "logps_train/policy_2_w": -136.58297729492188, "logps_train/ref_1_2": -102.5, "logps_train/ref_1_l": -51.5, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.6665773391723633, "rewards_train/1-l": -0.16808059811592102, "rewards_train/1-w": 2.007293462753296, "rewards_train/2-2": 1.163684606552124, "rewards_train/2-w": 0.9079137444496155, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.175374060869217, "rewards_train/margins_1": 1.3407161235809326, "rewards_train/margins_2": 0.25577086210250854, "step": 101 }, { "epoch": 0.3, "logps_train/policy_1_2": -211.22811889648438, "logps_train/policy_1_l": -171.84970092773438, "logps_train/policy_1_w": -154.41517639160156, "logps_train/policy_2_2": -179.59600830078125, "logps_train/policy_2_w": -177.51295471191406, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.9943753480911255, "rewards_train/1-l": -1.1443455219268799, "rewards_train/1-w": 2.1287951469421387, "rewards_train/2-2": 2.5903990268707275, "rewards_train/2-w": 1.4705795049667358, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2731406688690186, "rewards_train/margins_1": 0.13441979885101318, "rewards_train/margins_2": 1.1198195219039917, "step": 101 }, { "epoch": 0.31, "learning_rate": 4.860290398965423e-06, "loss": 0.7308, "step": 102 }, { "epoch": 0.31, "logps_train/policy_1_2": -126.62109375, "logps_train/policy_1_l": -128.68685913085938, "logps_train/policy_1_w": -102.62631225585938, "logps_train/policy_2_2": -102.74507904052734, "logps_train/policy_2_w": -128.2974853515625, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -114.5, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.1714842319488525, "rewards_train/1-l": -1.4296233654022217, "rewards_train/1-w": 1.3959627151489258, "rewards_train/2-2": 1.6225625276565552, "rewards_train/2-w": 0.8069701790809631, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.8255860805511475, "rewards_train/margins_1": 0.22447848320007324, "rewards_train/margins_2": 0.815592348575592, "step": 102 }, { "epoch": 0.31, "logps_train/policy_1_2": -238.74749755859375, "logps_train/policy_1_l": -179.17347717285156, "logps_train/policy_1_w": -132.36257934570312, "logps_train/policy_2_2": -188.0747833251953, "logps_train/policy_2_w": -171.8994903564453, "logps_train/ref_1_2": -252.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.347124695777893, "rewards_train/1-l": -0.6919562220573425, "rewards_train/1-w": 2.688741683959961, "rewards_train/2-2": 2.9534599781036377, "rewards_train/2-w": 1.816301703453064, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3806979060173035, "rewards_train/margins_1": 1.3416169881820679, "rewards_train/margins_2": 1.1371582746505737, "step": 102 }, { "epoch": 0.31, "logps_train/policy_1_2": -185.82260131835938, "logps_train/policy_1_l": -174.54571533203125, "logps_train/policy_1_w": -142.3411865234375, "logps_train/policy_2_2": -145.05690002441406, "logps_train/policy_2_w": -189.1318817138672, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": 1.6083647012710571, "rewards_train/1-l": -1.576446771621704, "rewards_train/1-w": 2.8471312522888184, "rewards_train/2-2": 2.23024845123291, "rewards_train/2-w": 2.018061637878418, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.4235780239105225, "rewards_train/margins_1": 1.2387665510177612, "rewards_train/margins_2": 0.2121868133544922, "step": 102 }, { "epoch": 0.31, "logps_train/policy_1_2": -117.16500854492188, "logps_train/policy_1_l": -133.44688415527344, "logps_train/policy_1_w": -101.34203338623047, "logps_train/policy_2_2": -99.00772857666016, "logps_train/policy_2_w": -118.2148666381836, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.5772496461868286, "rewards_train/1-l": -1.7306255102157593, "rewards_train/1-w": 1.181421160697937, "rewards_train/2-2": 1.869539737701416, "rewards_train/2-w": 0.7894508242607117, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.9120466709136963, "rewards_train/margins_1": -0.3958284854888916, "rewards_train/margins_2": 1.0800889134407043, "step": 102 }, { "epoch": 0.31, "logps_train/policy_1_2": -68.05408477783203, "logps_train/policy_1_l": -66.55864715576172, "logps_train/policy_1_w": -54.36375427246094, "logps_train/policy_2_2": -52.496063232421875, "logps_train/policy_2_w": -72.8355484008789, "logps_train/ref_1_2": -76.5, "logps_train/ref_1_l": -60.5, "logps_train/ref_1_w": -64.5, "logps_train/ref_2_2": -62.75, "logps_train/ref_2_w": -80.5, "rewards_train/1-2": 0.8676382899284363, "rewards_train/1-l": -0.6032276749610901, "rewards_train/1-w": 1.0417497158050537, "rewards_train/2-2": 1.0382845401763916, "rewards_train/2-w": 0.7766015529632568, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.6449773907661438, "rewards_train/margins_1": 0.17411142587661743, "rewards_train/margins_2": 0.26168298721313477, "step": 102 }, { "epoch": 0.31, "logps_train/policy_1_2": -199.53585815429688, "logps_train/policy_1_l": -135.79031372070312, "logps_train/policy_1_w": -162.20306396484375, "logps_train/policy_2_2": -171.02658081054688, "logps_train/policy_2_w": -196.88214111328125, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": 0.9581331014633179, "rewards_train/1-l": -1.4579384326934814, "rewards_train/1-w": 2.2437567710876465, "rewards_train/2-2": 1.6883578300476074, "rewards_train/2-w": 1.398505449295044, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.701695203781128, "rewards_train/margins_1": 1.2856236696243286, "rewards_train/margins_2": 0.2898523807525635, "step": 102 }, { "epoch": 0.31, "logps_train/policy_1_2": -237.71299743652344, "logps_train/policy_1_l": -228.0072784423828, "logps_train/policy_1_w": -176.80035400390625, "logps_train/policy_2_2": -185.072998046875, "logps_train/policy_2_w": -211.87448120117188, "logps_train/ref_1_2": -251.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -207.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.3583879470825195, "rewards_train/1-l": -2.0207467079162598, "rewards_train/1-w": 1.7293397188186646, "rewards_train/2-2": 2.1872310638427734, "rewards_train/2-w": 1.0066924095153809, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7500864267349243, "rewards_train/margins_1": 0.370951771736145, "rewards_train/margins_2": 1.1805386543273926, "step": 102 }, { "epoch": 0.31, "logps_train/policy_1_2": -157.9370880126953, "logps_train/policy_1_l": -133.22445678710938, "logps_train/policy_1_w": -118.8533935546875, "logps_train/policy_2_2": -126.88902282714844, "logps_train/policy_2_w": -147.48330688476562, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.6961354613304138, "rewards_train/1-l": -1.3353357315063477, "rewards_train/1-w": 2.87091064453125, "rewards_train/2-2": 1.720472812652588, "rewards_train/2-w": 2.036044120788574, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 4.206246376037598, "rewards_train/margins_1": 2.174775183200836, "rewards_train/margins_2": -0.31557130813598633, "step": 102 }, { "epoch": 0.31, "logps_train/policy_1_2": -140.0482940673828, "logps_train/policy_1_l": -140.72389221191406, "logps_train/policy_1_w": -104.71565246582031, "logps_train/policy_2_2": -127.54312133789062, "logps_train/policy_2_w": -126.790771484375, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 1.6267136335372925, "rewards_train/1-l": -1.2790305614471436, "rewards_train/1-w": 1.5654470920562744, "rewards_train/2-2": 1.9267425537109375, "rewards_train/2-w": 0.8529541492462158, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.844477653503418, "rewards_train/margins_1": -0.061266541481018066, "rewards_train/margins_2": 1.0737884044647217, "step": 103 }, { "epoch": 0.31, "logps_train/policy_1_2": -104.24332427978516, "logps_train/policy_1_l": -114.45702362060547, "logps_train/policy_1_w": -68.68975830078125, "logps_train/policy_2_2": -76.70861053466797, "logps_train/policy_2_w": -90.460693359375, "logps_train/ref_1_2": -111.5, "logps_train/ref_1_l": -100.5, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -97.0, "rewards_train/1-2": 0.7186369895935059, "rewards_train/1-l": -1.3859367370605469, "rewards_train/1-w": 0.938446044921875, "rewards_train/2-2": 1.4131232500076294, "rewards_train/2-w": 0.6758064031600952, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.324382781982422, "rewards_train/margins_1": 0.21980905532836914, "rewards_train/margins_2": 0.7373168468475342, "step": 103 }, { "epoch": 0.31, "logps_train/policy_1_2": -167.03521728515625, "logps_train/policy_1_l": -177.5965118408203, "logps_train/policy_1_w": -192.92478942871094, "logps_train/policy_2_2": -145.19110107421875, "logps_train/policy_2_w": -235.53565979003906, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -247.0, "rewards_train/1-2": 1.1027274131774902, "rewards_train/1-l": -2.0190253257751465, "rewards_train/1-w": 2.5606465339660645, "rewards_train/2-2": 1.8465144634246826, "rewards_train/2-w": 1.1136219501495361, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.579671859741211, "rewards_train/margins_1": 1.4579191207885742, "rewards_train/margins_2": 0.7328925132751465, "step": 103 }, { "epoch": 0.31, "logps_train/policy_1_2": -90.13388061523438, "logps_train/policy_1_l": -154.4288330078125, "logps_train/policy_1_w": -102.41648864746094, "logps_train/policy_2_2": -74.09165954589844, "logps_train/policy_2_w": -122.91507720947266, "logps_train/ref_1_2": -98.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -84.5, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 0.8108306527137756, "rewards_train/1-l": -2.0110480785369873, "rewards_train/1-w": 1.108546257019043, "rewards_train/2-2": 1.0470839738845825, "rewards_train/2-w": 0.8373990058898926, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.1195943355560303, "rewards_train/margins_1": 0.29771560430526733, "rewards_train/margins_2": 0.20968496799468994, "step": 103 }, { "epoch": 0.31, "logps_train/policy_1_2": -157.52908325195312, "logps_train/policy_1_l": -114.521728515625, "logps_train/policy_1_w": -135.65505981445312, "logps_train/policy_2_2": -135.5796356201172, "logps_train/policy_2_w": -152.57464599609375, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.3197475671768188, "rewards_train/1-l": -0.6135014891624451, "rewards_train/1-w": 2.107931613922119, "rewards_train/2-2": 1.9225056171417236, "rewards_train/2-w": 1.5081616640090942, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.721433103084564, "rewards_train/margins_1": 0.7881840467453003, "rewards_train/margins_2": 0.4143439531326294, "step": 103 }, { "epoch": 0.31, "logps_train/policy_1_2": -180.85386657714844, "logps_train/policy_1_l": -102.52277374267578, "logps_train/policy_1_w": -118.00260925292969, "logps_train/policy_2_2": -154.40896606445312, "logps_train/policy_2_w": -143.4586181640625, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -92.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.9677386283874512, "rewards_train/1-l": -1.0460273027420044, "rewards_train/1-w": 2.415755033493042, "rewards_train/2-2": 2.7403531074523926, "rewards_train/2-w": 1.7830442190170288, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4617823362350464, "rewards_train/margins_1": 0.4480164051055908, "rewards_train/margins_2": 0.9573088884353638, "step": 103 }, { "epoch": 0.31, "logps_train/policy_1_2": -170.74462890625, "logps_train/policy_1_l": -163.1282958984375, "logps_train/policy_1_w": -148.39093017578125, "logps_train/policy_2_2": -145.56752014160156, "logps_train/policy_2_w": -171.3748016357422, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.019287109375, "rewards_train/1-l": -1.6413460969924927, "rewards_train/1-w": 1.81950044631958, "rewards_train/2-2": 1.6393420696258545, "rewards_train/2-w": 1.5304877758026123, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4608465433120728, "rewards_train/margins_1": 0.8002133369445801, "rewards_train/margins_2": 0.10885429382324219, "step": 103 }, { "epoch": 0.31, "logps_train/policy_1_2": -95.53324890136719, "logps_train/policy_1_l": -83.81324005126953, "logps_train/policy_1_w": -95.27536010742188, "logps_train/policy_2_2": -75.35275268554688, "logps_train/policy_2_w": -108.83100128173828, "logps_train/ref_1_2": -98.0, "logps_train/ref_1_l": -74.5, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -82.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 0.2505815923213959, "rewards_train/1-l": -0.9305425882339478, "rewards_train/1-w": 1.1450227499008179, "rewards_train/2-2": 0.6617953777313232, "rewards_train/2-w": 0.9122124314308167, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.0755653381347656, "rewards_train/margins_1": 0.894441157579422, "rewards_train/margins_2": -0.2504170536994934, "step": 103 }, { "epoch": 0.31, "learning_rate": 4.852034553113364e-06, "loss": 0.8387, "step": 104 }, { "epoch": 0.31, "logps_train/policy_1_2": -115.13223266601562, "logps_train/policy_1_l": -119.46109771728516, "logps_train/policy_1_w": -124.36842346191406, "logps_train/policy_2_2": -100.21412658691406, "logps_train/policy_2_w": -134.00186157226562, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 0.5650972723960876, "rewards_train/1-l": -0.42970362305641174, "rewards_train/1-w": 0.9451886415481567, "rewards_train/2-2": 1.1665757894515991, "rewards_train/2-w": 0.9173916578292847, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.3748922646045685, "rewards_train/margins_1": 0.3800913691520691, "rewards_train/margins_2": 0.24918413162231445, "step": 104 }, { "epoch": 0.31, "logps_train/policy_1_2": -185.85427856445312, "logps_train/policy_1_l": -199.02603149414062, "logps_train/policy_1_w": -141.57772827148438, "logps_train/policy_2_2": -157.43801879882812, "logps_train/policy_2_w": -181.1984100341797, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 1.0294159650802612, "rewards_train/1-l": -2.0963540077209473, "rewards_train/1-w": 2.2961339950561523, "rewards_train/2-2": 1.9118627309799194, "rewards_train/2-w": 1.183283805847168, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.3924880027771, "rewards_train/margins_1": 1.2667180299758911, "rewards_train/margins_2": 0.7285789251327515, "step": 104 }, { "epoch": 0.31, "logps_train/policy_1_2": -137.32325744628906, "logps_train/policy_1_l": -144.07965087890625, "logps_train/policy_1_w": -109.87481689453125, "logps_train/policy_2_2": -121.25016784667969, "logps_train/policy_2_w": -138.13916015625, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.056736946105957, "rewards_train/1-l": -0.7837457656860352, "rewards_train/1-w": 2.2496275901794434, "rewards_train/2-2": 1.6950026750564575, "rewards_train/2-w": 1.4376466274261475, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.0333733558654785, "rewards_train/margins_1": 1.1928906440734863, "rewards_train/margins_2": 0.25735604763031006, "step": 104 }, { "epoch": 0.31, "logps_train/policy_1_2": -160.6015167236328, "logps_train/policy_1_l": -249.5264892578125, "logps_train/policy_1_w": -166.15618896484375, "logps_train/policy_2_2": -136.00694274902344, "logps_train/policy_2_w": -205.4954376220703, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -234.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": 0.9675824642181396, "rewards_train/1-l": -1.6401499509811401, "rewards_train/1-w": 1.3197320699691772, "rewards_train/2-2": 1.423133134841919, "rewards_train/2-w": 0.1377604454755783, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.9598820209503174, "rewards_train/margins_1": 0.3521496057510376, "rewards_train/margins_2": 1.2853726893663406, "step": 104 }, { "epoch": 0.31, "logps_train/policy_1_2": -155.20248413085938, "logps_train/policy_1_l": -160.31695556640625, "logps_train/policy_1_w": -136.74893188476562, "logps_train/policy_2_2": -126.89458465576172, "logps_train/policy_2_w": -188.57339477539062, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.3110028505325317, "rewards_train/1-l": -1.139451265335083, "rewards_train/1-w": 1.7719823122024536, "rewards_train/2-2": 2.0964789390563965, "rewards_train/2-w": 0.5926613807678223, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.9114335775375366, "rewards_train/margins_1": 0.4609794616699219, "rewards_train/margins_2": 1.5038175582885742, "step": 104 }, { "epoch": 0.31, "logps_train/policy_1_2": -108.69397735595703, "logps_train/policy_1_l": -90.16665649414062, "logps_train/policy_1_w": -92.72879028320312, "logps_train/policy_2_2": -94.19754028320312, "logps_train/policy_2_w": -109.14094543457031, "logps_train/ref_1_2": -117.5, "logps_train/ref_1_l": -79.5, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 0.8913447260856628, "rewards_train/1-l": -1.0537750720977783, "rewards_train/1-w": 1.2441133260726929, "rewards_train/2-2": 1.1988002061843872, "rewards_train/2-w": 0.7153980731964111, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.297888398170471, "rewards_train/margins_1": 0.35276859998703003, "rewards_train/margins_2": 0.4834021329879761, "step": 104 }, { "epoch": 0.31, "logps_train/policy_1_2": -77.96672821044922, "logps_train/policy_1_l": -60.85206604003906, "logps_train/policy_1_w": -96.41499328613281, "logps_train/policy_2_2": -64.42195129394531, "logps_train/policy_2_w": -113.28984832763672, "logps_train/ref_1_2": -85.5, "logps_train/ref_1_l": -56.0, "logps_train/ref_1_w": -104.5, "logps_train/ref_2_2": -73.0, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 0.7345772981643677, "rewards_train/1-l": -0.48676908016204834, "rewards_train/1-w": 0.8053759336471558, "rewards_train/2-2": 0.8890552520751953, "rewards_train/2-w": 0.5335149765014648, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.292145013809204, "rewards_train/margins_1": 0.07079863548278809, "rewards_train/margins_2": 0.35554027557373047, "step": 104 }, { "epoch": 0.31, "logps_train/policy_1_2": -125.454345703125, "logps_train/policy_1_l": -161.75927734375, "logps_train/policy_1_w": -96.16444396972656, "logps_train/policy_2_2": -91.12019348144531, "logps_train/policy_2_w": -132.92837524414062, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 1.015503168106079, "rewards_train/1-l": -2.14858341217041, "rewards_train/1-w": 2.163242816925049, "rewards_train/2-2": 1.663761854171753, "rewards_train/2-w": 1.0446622371673584, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.311826229095459, "rewards_train/margins_1": 1.1477396488189697, "rewards_train/margins_2": 0.6190996170043945, "step": 104 }, { "epoch": 0.31, "logps_train/policy_1_2": -105.75733184814453, "logps_train/policy_1_l": -94.28404998779297, "logps_train/policy_1_w": -114.05778503417969, "logps_train/policy_2_2": -91.10005187988281, "logps_train/policy_2_w": -130.34674072265625, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 1.0469228029251099, "rewards_train/1-l": -0.5794306397438049, "rewards_train/1-w": 1.2950024604797363, "rewards_train/2-2": 1.199368953704834, "rewards_train/2-w": 0.8237244486808777, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.8744331002235413, "rewards_train/margins_1": 0.24807965755462646, "rewards_train/margins_2": 0.3756445050239563, "step": 105 }, { "epoch": 0.31, "logps_train/policy_1_2": -138.3819122314453, "logps_train/policy_1_l": -190.71206665039062, "logps_train/policy_1_w": -113.08393859863281, "logps_train/policy_2_2": -117.22770690917969, "logps_train/policy_2_w": -148.39627075195312, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.348527431488037, "rewards_train/1-l": -1.6947414875030518, "rewards_train/1-w": 1.22598135471344, "rewards_train/2-2": 1.6682443618774414, "rewards_train/2-w": 0.6517787575721741, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.9207228422164917, "rewards_train/margins_1": -0.12254607677459717, "rewards_train/margins_2": 1.0164656043052673, "step": 105 }, { "epoch": 0.31, "logps_train/policy_1_2": -97.59281921386719, "logps_train/policy_1_l": -80.9696044921875, "logps_train/policy_1_w": -145.5546112060547, "logps_train/policy_2_2": -78.84088134765625, "logps_train/policy_2_w": -172.2010498046875, "logps_train/ref_1_2": -102.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 0.41493648290634155, "rewards_train/1-l": -0.2711793780326843, "rewards_train/1-w": 1.904695987701416, "rewards_train/2-2": 0.7877874374389648, "rewards_train/2-w": 0.9767698049545288, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.1758753657341003, "rewards_train/margins_1": 1.4897595047950745, "rewards_train/margins_2": -0.18898236751556396, "step": 105 }, { "epoch": 0.31, "logps_train/policy_1_2": -185.11024475097656, "logps_train/policy_1_l": -176.63314819335938, "logps_train/policy_1_w": -208.0555877685547, "logps_train/policy_2_2": -154.86196899414062, "logps_train/policy_2_w": -253.2558135986328, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 1.2327263355255127, "rewards_train/1-l": -1.3676108121871948, "rewards_train/1-w": 1.5678790807724, "rewards_train/2-2": 1.9950520992279053, "rewards_train/2-w": 0.19941860437393188, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.9354898929595947, "rewards_train/margins_1": 0.3351527452468872, "rewards_train/margins_2": 1.7956334948539734, "step": 105 }, { "epoch": 0.31, "logps_train/policy_1_2": -166.98463439941406, "logps_train/policy_1_l": -113.6181411743164, "logps_train/policy_1_w": -102.90565490722656, "logps_train/policy_2_2": -138.19699096679688, "logps_train/policy_2_w": -126.7099380493164, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.682787299156189, "rewards_train/1-l": -0.6836888790130615, "rewards_train/1-w": 1.1861931085586548, "rewards_train/2-2": 2.155301094055176, "rewards_train/2-w": 0.5917016863822937, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 1.8698819875717163, "rewards_train/margins_1": -0.4965941905975342, "rewards_train/margins_2": 1.563599407672882, "step": 105 }, { "epoch": 0.31, "logps_train/policy_1_2": -185.95896911621094, "logps_train/policy_1_l": -185.42477416992188, "logps_train/policy_1_w": -127.61125183105469, "logps_train/policy_2_2": -167.6778564453125, "logps_train/policy_2_w": -140.76087951660156, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.9462904930114746, "rewards_train/1-l": -1.695603847503662, "rewards_train/1-w": 2.6896564960479736, "rewards_train/2-2": 2.608778476715088, "rewards_train/2-w": 2.3395373821258545, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.385260343551636, "rewards_train/margins_1": 0.743366003036499, "rewards_train/margins_2": 0.2692410945892334, "step": 105 }, { "epoch": 0.31, "logps_train/policy_1_2": -131.59835815429688, "logps_train/policy_1_l": -182.57821655273438, "logps_train/policy_1_w": -144.31790161132812, "logps_train/policy_2_2": -97.76356506347656, "logps_train/policy_2_w": -182.04949951171875, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 0.8721942901611328, "rewards_train/1-l": -1.8938560485839844, "rewards_train/1-w": 2.2648892402648926, "rewards_train/2-2": 1.7716903686523438, "rewards_train/2-w": 1.2032533884048462, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.158745288848877, "rewards_train/margins_1": 1.3926949501037598, "rewards_train/margins_2": 0.5684369802474976, "step": 105 }, { "epoch": 0.31, "logps_train/policy_1_2": -243.46751403808594, "logps_train/policy_1_l": -216.24948120117188, "logps_train/policy_1_w": -176.28042602539062, "logps_train/policy_2_2": -204.94041442871094, "logps_train/policy_2_w": -216.5330047607422, "logps_train/ref_1_2": -256.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": 1.2673100233078003, "rewards_train/1-l": -3.1796352863311768, "rewards_train/1-w": 2.553208351135254, "rewards_train/2-2": 2.726661205291748, "rewards_train/2-w": 1.192012071609497, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.732843637466431, "rewards_train/margins_1": 1.2858983278274536, "rewards_train/margins_2": 1.534649133682251, "step": 105 }, { "epoch": 0.32, "learning_rate": 4.843549152835303e-06, "loss": 0.9646, "step": 106 }, { "epoch": 0.32, "logps_train/policy_1_2": -127.08670043945312, "logps_train/policy_1_l": -133.92929077148438, "logps_train/policy_1_w": -162.540283203125, "logps_train/policy_2_2": -108.61051177978516, "logps_train/policy_2_w": -190.30422973632812, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.250704050064087, "rewards_train/1-l": -1.018320083618164, "rewards_train/1-w": 2.2024171352386475, "rewards_train/2-2": 1.5639485120773315, "rewards_train/2-w": 1.4844214916229248, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.2207372188568115, "rewards_train/margins_1": 0.9517130851745605, "rewards_train/margins_2": 0.07952702045440674, "step": 106 }, { "epoch": 0.32, "logps_train/policy_1_2": -184.33441162109375, "logps_train/policy_1_l": -222.26315307617188, "logps_train/policy_1_w": -147.4610137939453, "logps_train/policy_2_2": -151.2669219970703, "logps_train/policy_2_w": -175.4582061767578, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 1.0056208372116089, "rewards_train/1-l": -2.393111228942871, "rewards_train/1-w": 1.8324145078659058, "rewards_train/2-2": 1.7475271224975586, "rewards_train/2-w": 1.1497853994369507, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.225525736808777, "rewards_train/margins_1": 0.8267936706542969, "rewards_train/margins_2": 0.5977417230606079, "step": 106 }, { "epoch": 0.32, "logps_train/policy_1_2": -128.4837646484375, "logps_train/policy_1_l": -140.94215393066406, "logps_train/policy_1_w": -121.20172119140625, "logps_train/policy_2_2": -99.69497680664062, "logps_train/policy_2_w": -143.46334838867188, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.3930293321609497, "rewards_train/1-l": -1.4709728956222534, "rewards_train/1-w": 2.1532654762268066, "rewards_train/2-2": 1.9586281776428223, "rewards_train/2-w": 1.6224148273468018, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.62423837184906, "rewards_train/margins_1": 0.7602361440658569, "rewards_train/margins_2": 0.3362133502960205, "step": 106 }, { "epoch": 0.32, "logps_train/policy_1_2": -168.9609375, "logps_train/policy_1_l": -158.88272094726562, "logps_train/policy_1_w": -107.37071228027344, "logps_train/policy_2_2": -137.2403564453125, "logps_train/policy_2_w": -123.67048645019531, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 0.7898433208465576, "rewards_train/1-l": -1.997257113456726, "rewards_train/1-w": 1.2582416534423828, "rewards_train/2-2": 1.936901330947876, "rewards_train/2-w": 0.7235757112503052, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.255498766899109, "rewards_train/margins_1": 0.4683983325958252, "rewards_train/margins_2": 1.2133256196975708, "step": 106 }, { "epoch": 0.32, "logps_train/policy_1_2": -171.82330322265625, "logps_train/policy_1_l": -188.521240234375, "logps_train/policy_1_w": -111.67107391357422, "logps_train/policy_2_2": -146.04705810546875, "logps_train/policy_2_w": -137.56622314453125, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 0.8739199638366699, "rewards_train/1-l": -2.237670421600342, "rewards_train/1-w": 1.6453930139541626, "rewards_train/2-2": 1.572638750076294, "rewards_train/2-w": 0.9238476753234863, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8830634355545044, "rewards_train/margins_1": 0.7714730501174927, "rewards_train/margins_2": 0.6487910747528076, "step": 106 }, { "epoch": 0.32, "logps_train/policy_1_2": -204.7787322998047, "logps_train/policy_1_l": -164.51168823242188, "logps_train/policy_1_w": -124.65611267089844, "logps_train/policy_2_2": -163.5192413330078, "logps_train/policy_2_w": -160.4935302734375, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 1.8705639839172363, "rewards_train/1-l": -1.379488229751587, "rewards_train/1-w": 1.7170056104660034, "rewards_train/2-2": 3.1847939491271973, "rewards_train/2-w": 0.852892279624939, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.0964938402175903, "rewards_train/margins_1": -0.1535583734512329, "rewards_train/margins_2": 2.3319016695022583, "step": 106 }, { "epoch": 0.32, "logps_train/policy_1_2": -98.19532775878906, "logps_train/policy_1_l": -144.99197387695312, "logps_train/policy_1_w": -107.8812255859375, "logps_train/policy_2_2": -85.27252960205078, "logps_train/policy_2_w": -125.36578369140625, "logps_train/ref_1_2": -114.5, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.6517565250396729, "rewards_train/1-l": -1.1150180101394653, "rewards_train/1-w": 1.5626585483551025, "rewards_train/2-2": 1.862688660621643, "rewards_train/2-w": 1.0235779285430908, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.677676558494568, "rewards_train/margins_1": -0.08909797668457031, "rewards_train/margins_2": 0.8391107320785522, "step": 106 }, { "epoch": 0.32, "logps_train/policy_1_2": -114.64327239990234, "logps_train/policy_1_l": -74.79237365722656, "logps_train/policy_1_w": -125.14691162109375, "logps_train/policy_2_2": -98.48406982421875, "logps_train/policy_2_w": -143.39361572265625, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -71.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.1208291053771973, "rewards_train/1-l": -0.3622453212738037, "rewards_train/1-w": 1.78609037399292, "rewards_train/2-2": 1.2187801599502563, "rewards_train/2-w": 1.3457934856414795, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.1483356952667236, "rewards_train/margins_1": 0.6652612686157227, "rewards_train/margins_2": -0.12701332569122314, "step": 106 }, { "epoch": 0.32, "logps_train/policy_1_2": -236.59103393554688, "logps_train/policy_1_l": -285.1076354980469, "logps_train/policy_1_w": -192.64593505859375, "logps_train/policy_2_2": -200.46493530273438, "logps_train/policy_2_w": -240.56332397460938, "logps_train/ref_1_2": -254.0, "logps_train/ref_1_l": -262.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -226.0, "logps_train/ref_2_w": -258.0, "rewards_train/1-2": 1.7112095355987549, "rewards_train/1-l": -2.36857533454895, "rewards_train/1-w": 3.0557193756103516, "rewards_train/2-2": 2.5675692558288574, "rewards_train/2-w": 1.7467912435531616, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.424294710159302, "rewards_train/margins_1": 1.3445098400115967, "rewards_train/margins_2": 0.8207780122756958, "step": 107 }, { "epoch": 0.32, "logps_train/policy_1_2": -95.92283630371094, "logps_train/policy_1_l": -142.2745819091797, "logps_train/policy_1_w": -76.56938934326172, "logps_train/policy_2_2": -81.586669921875, "logps_train/policy_2_w": -99.12008666992188, "logps_train/ref_1_2": -106.5, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -90.5, "logps_train/ref_2_2": -94.5, "logps_train/ref_2_w": -107.0, "rewards_train/1-2": 1.0803725719451904, "rewards_train/1-l": -1.7965019941329956, "rewards_train/1-w": 1.382514238357544, "rewards_train/2-2": 1.3225833177566528, "rewards_train/2-w": 0.7911163568496704, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.1790162324905396, "rewards_train/margins_1": 0.3021416664123535, "rewards_train/margins_2": 0.5314669609069824, "step": 107 }, { "epoch": 0.32, "logps_train/policy_1_2": -170.31887817382812, "logps_train/policy_1_l": -131.69082641601562, "logps_train/policy_1_w": -133.50367736816406, "logps_train/policy_2_2": -138.66738891601562, "logps_train/policy_2_w": -170.10244750976562, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.8978005647659302, "rewards_train/1-l": -0.9222069382667542, "rewards_train/1-w": 2.269944906234741, "rewards_train/2-2": 2.9082605838775635, "rewards_train/2-w": 1.4010835886001587, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.1921518445014954, "rewards_train/margins_1": 0.37214434146881104, "rewards_train/margins_2": 1.5071769952774048, "step": 107 }, { "epoch": 0.32, "logps_train/policy_1_2": -124.66462707519531, "logps_train/policy_1_l": -186.75192260742188, "logps_train/policy_1_w": -115.075439453125, "logps_train/policy_2_2": -93.09199523925781, "logps_train/policy_2_w": -150.3649444580078, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.0612714290618896, "rewards_train/1-l": -1.7189418077468872, "rewards_train/1-w": 1.6604249477386475, "rewards_train/2-2": 1.8611135482788086, "rewards_train/2-w": 0.6795220971107483, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3793667554855347, "rewards_train/margins_1": 0.5991535186767578, "rewards_train/margins_2": 1.1815914511680603, "step": 107 }, { "epoch": 0.32, "logps_train/policy_1_2": -139.86119079589844, "logps_train/policy_1_l": -125.015869140625, "logps_train/policy_1_w": -120.96920776367188, "logps_train/policy_2_2": -120.61075592041016, "logps_train/policy_2_w": -150.0313720703125, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.3498179912567139, "rewards_train/1-l": -0.901587188243866, "rewards_train/1-w": 1.9405791759490967, "rewards_train/2-2": 1.9576740264892578, "rewards_train/2-w": 1.096863031387329, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.8421663641929626, "rewards_train/margins_1": 0.5907611846923828, "rewards_train/margins_2": 0.8608109951019287, "step": 107 }, { "epoch": 0.32, "logps_train/policy_1_2": -157.30581665039062, "logps_train/policy_1_l": -168.989990234375, "logps_train/policy_1_w": -163.38397216796875, "logps_train/policy_2_2": -122.10786437988281, "logps_train/policy_2_w": -205.79832458496094, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.0858259201049805, "rewards_train/1-l": -1.4950926303863525, "rewards_train/1-w": 2.4553513526916504, "rewards_train/2-2": 1.8673381805419922, "rewards_train/2-w": 1.282667875289917, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.950443983078003, "rewards_train/margins_1": 1.36952543258667, "rewards_train/margins_2": 0.5846703052520752, "step": 107 }, { "epoch": 0.32, "logps_train/policy_1_2": -236.6928253173828, "logps_train/policy_1_l": -225.017333984375, "logps_train/policy_1_w": -155.70748901367188, "logps_train/policy_2_2": -193.28610229492188, "logps_train/policy_2_w": -193.68731689453125, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -223.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 0.8807177543640137, "rewards_train/1-l": -0.340795636177063, "rewards_train/1-w": 2.230814218521118, "rewards_train/2-2": 2.9526400566101074, "rewards_train/2-w": 1.4265800714492798, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.571609854698181, "rewards_train/margins_1": 1.3500964641571045, "rewards_train/margins_2": 1.5260599851608276, "step": 107 }, { "epoch": 0.32, "logps_train/policy_1_2": -121.47186279296875, "logps_train/policy_1_l": -75.84349060058594, "logps_train/policy_1_w": -105.13603210449219, "logps_train/policy_2_2": -91.57364654541016, "logps_train/policy_2_w": -143.16122436523438, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -71.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 0.679376482963562, "rewards_train/1-l": -0.5212634205818176, "rewards_train/1-w": 1.8488969802856445, "rewards_train/2-2": 1.5098230838775635, "rewards_train/2-w": 1.0682517290115356, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.370160400867462, "rewards_train/margins_1": 1.1695204973220825, "rewards_train/margins_2": 0.44157135486602783, "step": 107 }, { "epoch": 0.32, "learning_rate": 4.834835026291348e-06, "loss": 0.8531, "step": 108 }, { "epoch": 0.32, "logps_train/policy_1_2": -131.91317749023438, "logps_train/policy_1_l": -144.13308715820312, "logps_train/policy_1_w": -115.81159210205078, "logps_train/policy_2_2": -114.90080261230469, "logps_train/policy_2_w": -141.34512329101562, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 0.6627833247184753, "rewards_train/1-l": -1.0117466449737549, "rewards_train/1-w": 1.361809253692627, "rewards_train/2-2": 1.057185173034668, "rewards_train/2-w": 0.8818932175636292, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.373555898666382, "rewards_train/margins_1": 0.6990259289741516, "rewards_train/margins_2": 0.17529195547103882, "step": 108 }, { "epoch": 0.32, "logps_train/policy_1_2": -86.97966003417969, "logps_train/policy_1_l": -85.02421569824219, "logps_train/policy_1_w": -94.21315002441406, "logps_train/policy_2_2": -67.04190826416016, "logps_train/policy_2_w": -129.96783447265625, "logps_train/ref_1_2": -92.5, "logps_train/ref_1_l": -74.0, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -77.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 0.5637525320053101, "rewards_train/1-l": -1.114531397819519, "rewards_train/1-w": 2.423997402191162, "rewards_train/2-2": 1.002058982849121, "rewards_train/2-w": 1.3360286951065063, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.538528800010681, "rewards_train/margins_1": 1.860244870185852, "rewards_train/margins_2": -0.33396971225738525, "step": 108 }, { "epoch": 0.32, "logps_train/policy_1_2": -85.58358001708984, "logps_train/policy_1_l": -160.96783447265625, "logps_train/policy_1_w": -77.98274993896484, "logps_train/policy_2_2": -71.8196029663086, "logps_train/policy_2_w": -97.01181030273438, "logps_train/ref_1_2": -95.5, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -84.0, "logps_train/ref_2_w": -113.5, "rewards_train/1-2": 1.0033609867095947, "rewards_train/1-l": -1.8911198377609253, "rewards_train/1-w": 2.4282875061035156, "rewards_train/2-2": 1.227414846420288, "rewards_train/2-w": 1.6519436836242676, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.319407343864441, "rewards_train/margins_1": 1.424926519393921, "rewards_train/margins_2": -0.4245288372039795, "step": 108 }, { "epoch": 0.32, "logps_train/policy_1_2": -167.5655517578125, "logps_train/policy_1_l": -202.6318359375, "logps_train/policy_1_w": -181.11776733398438, "logps_train/policy_2_2": -143.4803924560547, "logps_train/policy_2_w": -212.34657287597656, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.1403192281723022, "rewards_train/1-l": -1.754004955291748, "rewards_train/1-w": 1.78275465965271, "rewards_train/2-2": 1.6191483736038208, "rewards_train/2-w": 0.9903432130813599, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.536759614944458, "rewards_train/margins_1": 0.6424354314804077, "rewards_train/margins_2": 0.6288051605224609, "step": 108 }, { "epoch": 0.32, "logps_train/policy_1_2": -205.97032165527344, "logps_train/policy_1_l": -268.46234130859375, "logps_train/policy_1_w": -144.83251953125, "logps_train/policy_2_2": -173.860595703125, "logps_train/policy_2_w": -178.85223388671875, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -236.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.393592119216919, "rewards_train/1-l": -3.2099034786224365, "rewards_train/1-w": 2.23081111907959, "rewards_train/2-2": 2.245190143585205, "rewards_train/2-w": 1.2272756099700928, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.440714597702026, "rewards_train/margins_1": 0.8372189998626709, "rewards_train/margins_2": 1.0179145336151123, "step": 108 }, { "epoch": 0.32, "logps_train/policy_1_2": -139.72439575195312, "logps_train/policy_1_l": -130.72634887695312, "logps_train/policy_1_w": -139.19406127929688, "logps_train/policy_2_2": -110.55601501464844, "logps_train/policy_2_w": -184.0281982421875, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.622874140739441, "rewards_train/1-l": -0.8057394027709961, "rewards_train/1-w": 2.042799949645996, "rewards_train/2-2": 2.2068982124328613, "rewards_train/2-w": 0.7098740339279175, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.848539352416992, "rewards_train/margins_1": 0.4199258089065552, "rewards_train/margins_2": 1.4970241785049438, "step": 108 }, { "epoch": 0.32, "logps_train/policy_1_2": -170.79022216796875, "logps_train/policy_1_l": -90.33822631835938, "logps_train/policy_1_w": -83.0033187866211, "logps_train/policy_2_2": -146.7907257080078, "logps_train/policy_2_w": -107.85831451416016, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -98.5, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 1.281134843826294, "rewards_train/1-l": -0.9238610863685608, "rewards_train/1-w": 1.5555274486541748, "rewards_train/2-2": 1.6678024530410767, "rewards_train/2-w": 0.8176842927932739, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.4793885350227356, "rewards_train/margins_1": 0.27439260482788086, "rewards_train/margins_2": 0.8501181602478027, "step": 108 }, { "epoch": 0.32, "logps_train/policy_1_2": -199.26254272460938, "logps_train/policy_1_l": -180.63668823242188, "logps_train/policy_1_w": -176.23904418945312, "logps_train/policy_2_2": -159.04135131835938, "logps_train/policy_2_w": -221.03443908691406, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -233.0, "rewards_train/1-2": 1.068276047706604, "rewards_train/1-l": -1.5851528644561768, "rewards_train/1-w": 2.9448461532592773, "rewards_train/2-2": 2.4513328075408936, "rewards_train/2-w": 1.202805995941162, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.529999017715454, "rewards_train/margins_1": 1.8765701055526733, "rewards_train/margins_2": 1.2485268115997314, "step": 108 }, { "epoch": 0.33, "logps_train/policy_1_2": -130.10977172851562, "logps_train/policy_1_l": -134.35943603515625, "logps_train/policy_1_w": -122.77364349365234, "logps_train/policy_2_2": -110.31167602539062, "logps_train/policy_2_w": -145.677978515625, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.2226159572601318, "rewards_train/1-l": -1.173541784286499, "rewards_train/1-w": 1.8304476737976074, "rewards_train/2-2": 1.8075040578842163, "rewards_train/2-w": 1.0243899822235107, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0039894580841064, "rewards_train/margins_1": 0.6078317165374756, "rewards_train/margins_2": 0.7831140756607056, "step": 109 }, { "epoch": 0.33, "logps_train/policy_1_2": -156.60459899902344, "logps_train/policy_1_l": -99.02619934082031, "logps_train/policy_1_w": -119.18263244628906, "logps_train/policy_2_2": -130.79910278320312, "logps_train/policy_2_w": -144.16131591796875, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.1731336116790771, "rewards_train/1-l": -0.591486930847168, "rewards_train/1-w": 1.7493879795074463, "rewards_train/2-2": 1.9208718538284302, "rewards_train/2-w": 0.8393378853797913, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.3408749103546143, "rewards_train/margins_1": 0.5762543678283691, "rewards_train/margins_2": 1.081533968448639, "step": 109 }, { "epoch": 0.33, "logps_train/policy_1_2": -153.51458740234375, "logps_train/policy_1_l": -110.02821350097656, "logps_train/policy_1_w": -119.25007629394531, "logps_train/policy_2_2": -137.5675811767578, "logps_train/policy_2_w": -138.743896484375, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.9126033782958984, "rewards_train/1-l": -0.363563597202301, "rewards_train/1-w": 1.7941334247589111, "rewards_train/2-2": 2.301054000854492, "rewards_train/2-w": 1.5881098508834839, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.157697021961212, "rewards_train/margins_1": -0.1184699535369873, "rewards_train/margins_2": 0.7129441499710083, "step": 109 }, { "epoch": 0.33, "logps_train/policy_1_2": -36.05780792236328, "logps_train/policy_1_l": -48.65686798095703, "logps_train/policy_1_w": -46.356353759765625, "logps_train/policy_2_2": -27.901565551757812, "logps_train/policy_2_w": -56.48143005371094, "logps_train/ref_1_2": -41.0, "logps_train/ref_1_l": -42.0, "logps_train/ref_1_w": -55.5, "logps_train/ref_2_2": -34.5, "logps_train/ref_2_w": -63.5, "rewards_train/1-2": 0.4813283383846283, "rewards_train/1-l": -0.686658501625061, "rewards_train/1-w": 0.9119473695755005, "rewards_train/2-2": 0.6594528555870056, "rewards_train/2-w": 0.6851092576980591, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.5986058712005615, "rewards_train/margins_1": 0.4306190311908722, "rewards_train/margins_2": -0.025656402111053467, "step": 109 }, { "epoch": 0.33, "logps_train/policy_1_2": -190.50265502929688, "logps_train/policy_1_l": -191.9660186767578, "logps_train/policy_1_w": -121.39509582519531, "logps_train/policy_2_2": -156.27359008789062, "logps_train/policy_2_w": -150.29470825195312, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.263991355895996, "rewards_train/1-l": -1.7567589282989502, "rewards_train/1-w": 2.2200605869293213, "rewards_train/2-2": 2.558382511138916, "rewards_train/2-w": 1.3422088623046875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9768195152282715, "rewards_train/margins_1": 0.9560692310333252, "rewards_train/margins_2": 1.2161736488342285, "step": 109 }, { "epoch": 0.33, "logps_train/policy_1_2": -142.02272033691406, "logps_train/policy_1_l": -129.7003173828125, "logps_train/policy_1_w": -108.53224182128906, "logps_train/policy_2_2": -112.26336669921875, "logps_train/policy_2_w": -146.20538330078125, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -119.5, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 0.633568286895752, "rewards_train/1-l": -1.0274521112442017, "rewards_train/1-w": 1.6979482173919678, "rewards_train/2-2": 1.6372376680374146, "rewards_train/2-w": 0.30094581842422485, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7254003286361694, "rewards_train/margins_1": 1.0643799304962158, "rewards_train/margins_2": 1.3362918496131897, "step": 109 }, { "epoch": 0.33, "logps_train/policy_1_2": -201.58164978027344, "logps_train/policy_1_l": -170.0113983154297, "logps_train/policy_1_w": -154.53616333007812, "logps_train/policy_2_2": -170.73135375976562, "logps_train/policy_2_w": -181.40023803710938, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.6637098789215088, "rewards_train/1-l": -1.522428274154663, "rewards_train/1-w": 2.1444308757781982, "rewards_train/2-2": 2.7987399101257324, "rewards_train/2-w": 1.4525548219680786, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6668591499328613, "rewards_train/margins_1": 0.48072099685668945, "rewards_train/margins_2": 1.3461850881576538, "step": 109 }, { "epoch": 0.33, "logps_train/policy_1_2": -146.91787719726562, "logps_train/policy_1_l": -131.20042419433594, "logps_train/policy_1_w": -139.96429443359375, "logps_train/policy_2_2": -121.29415893554688, "logps_train/policy_2_w": -172.2060546875, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 0.8269632458686829, "rewards_train/1-l": -0.9211165904998779, "rewards_train/1-w": 2.3348209857940674, "rewards_train/2-2": 1.5528104305267334, "rewards_train/2-w": 1.8418934345245361, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 3.2559375762939453, "rewards_train/margins_1": 1.5078577399253845, "rewards_train/margins_2": -0.28908300399780273, "step": 109 }, { "epoch": 0.33, "learning_rate": 4.825893023964886e-06, "loss": 0.8711, "step": 110 }, { "epoch": 0.33, "logps_train/policy_1_2": -203.5108642578125, "logps_train/policy_1_l": -169.53126525878906, "logps_train/policy_1_w": -198.27447509765625, "logps_train/policy_2_2": -163.66690063476562, "logps_train/policy_2_w": -237.56283569335938, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -222.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -251.0, "rewards_train/1-2": 1.254187822341919, "rewards_train/1-l": -1.076563835144043, "rewards_train/1-w": 2.3288021087646484, "rewards_train/2-2": 2.806062698364258, "rewards_train/2-w": 1.3280913829803467, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4053659439086914, "rewards_train/margins_1": 1.0746142864227295, "rewards_train/margins_2": 1.4779713153839111, "step": 110 }, { "epoch": 0.33, "logps_train/policy_1_2": -123.9832763671875, "logps_train/policy_1_l": -121.16059875488281, "logps_train/policy_1_w": -90.09400177001953, "logps_train/policy_2_2": -93.17393493652344, "logps_train/policy_2_w": -120.14006805419922, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 0.6047977209091187, "rewards_train/1-l": -1.0320391654968262, "rewards_train/1-w": 1.2911853790283203, "rewards_train/2-2": 1.0857317447662354, "rewards_train/2-w": 0.6939038038253784, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.3232245445251465, "rewards_train/margins_1": 0.6863876581192017, "rewards_train/margins_2": 0.39182794094085693, "step": 110 }, { "epoch": 0.33, "logps_train/policy_1_2": -187.1931610107422, "logps_train/policy_1_l": -154.13816833496094, "logps_train/policy_1_w": -121.33135986328125, "logps_train/policy_2_2": -145.8199462890625, "logps_train/policy_2_w": -163.32574462890625, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 0.9213088154792786, "rewards_train/1-l": -1.5435047149658203, "rewards_train/1-w": 1.9684261083602905, "rewards_train/2-2": 2.260192632675171, "rewards_train/2-w": 0.9049243927001953, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.511930823326111, "rewards_train/margins_1": 1.047117292881012, "rewards_train/margins_2": 1.3552682399749756, "step": 110 }, { "epoch": 0.33, "logps_train/policy_1_2": -186.65625, "logps_train/policy_1_l": -173.6834716796875, "logps_train/policy_1_w": -156.51617431640625, "logps_train/policy_2_2": -152.83102416992188, "logps_train/policy_2_w": -192.47918701171875, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.1820306777954102, "rewards_train/1-l": -1.1765503883361816, "rewards_train/1-w": 2.499946355819702, "rewards_train/2-2": 2.2583038806915283, "rewards_train/2-w": 1.5645802021026611, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.676496744155884, "rewards_train/margins_1": 1.317915678024292, "rewards_train/margins_2": 0.6937236785888672, "step": 110 }, { "epoch": 0.33, "logps_train/policy_1_2": -103.24783325195312, "logps_train/policy_1_l": -119.18647766113281, "logps_train/policy_1_w": -122.71781158447266, "logps_train/policy_2_2": -80.45576477050781, "logps_train/policy_2_w": -146.69866943359375, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.284200668334961, "rewards_train/1-l": -0.5517532229423523, "rewards_train/1-w": 2.043062686920166, "rewards_train/2-2": 1.77434504032135, "rewards_train/2-w": 1.0418516397476196, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.5948159098625183, "rewards_train/margins_1": 0.7588620185852051, "rewards_train/margins_2": 0.7324934005737305, "step": 110 }, { "epoch": 0.33, "logps_train/policy_1_2": -208.36053466796875, "logps_train/policy_1_l": -198.9068145751953, "logps_train/policy_1_w": -153.76144409179688, "logps_train/policy_2_2": -171.58721923828125, "logps_train/policy_2_w": -196.14645385742188, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 0.8264472484588623, "rewards_train/1-l": -1.8500560522079468, "rewards_train/1-w": 1.9941675662994385, "rewards_train/2-2": 1.9850285053253174, "rewards_train/2-w": 0.6087927222251892, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8442236185073853, "rewards_train/margins_1": 1.1677203178405762, "rewards_train/margins_2": 1.3762357831001282, "step": 110 }, { "epoch": 0.33, "logps_train/policy_1_2": -159.5887451171875, "logps_train/policy_1_l": -249.75973510742188, "logps_train/policy_1_w": -205.30661010742188, "logps_train/policy_2_2": -128.9116668701172, "logps_train/policy_2_w": -265.485595703125, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -242.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -278.0, "rewards_train/1-2": 1.8317502737045288, "rewards_train/1-l": -2.211519479751587, "rewards_train/1-w": 3.627150297164917, "rewards_train/2-2": 2.1150832176208496, "rewards_train/2-w": 1.3483177423477173, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.838669776916504, "rewards_train/margins_1": 1.7954000234603882, "rewards_train/margins_2": 0.7667654752731323, "step": 110 }, { "epoch": 0.33, "logps_train/policy_1_2": -122.24603271484375, "logps_train/policy_1_l": -147.31517028808594, "logps_train/policy_1_w": -107.39044189453125, "logps_train/policy_2_2": -98.95431518554688, "logps_train/policy_2_w": -140.25953674316406, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.0046931505203247, "rewards_train/1-l": -1.1471174955368042, "rewards_train/1-w": 2.056293249130249, "rewards_train/2-2": 1.444021224975586, "rewards_train/2-w": 1.2855455875396729, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.2034107446670532, "rewards_train/margins_1": 1.0516000986099243, "rewards_train/margins_2": 0.15847563743591309, "step": 110 }, { "epoch": 0.33, "logps_train/policy_1_2": -121.29611206054688, "logps_train/policy_1_l": -98.52676391601562, "logps_train/policy_1_w": -114.51498413085938, "logps_train/policy_2_2": -96.95631408691406, "logps_train/policy_2_w": -137.33935546875, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 0.4367951452732086, "rewards_train/1-l": -0.5499416589736938, "rewards_train/1-w": 2.0188143253326416, "rewards_train/2-2": 0.9582746028900146, "rewards_train/2-w": 1.3691890239715576, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.5687559843063354, "rewards_train/margins_1": 1.582019180059433, "rewards_train/margins_2": -0.41091442108154297, "step": 111 }, { "epoch": 0.33, "logps_train/policy_1_2": -180.9404754638672, "logps_train/policy_1_l": -154.97201538085938, "logps_train/policy_1_w": -119.39537811279297, "logps_train/policy_2_2": -148.43136596679688, "logps_train/policy_2_w": -156.97174072265625, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.0309522151947021, "rewards_train/1-l": -1.2645845413208008, "rewards_train/1-w": 2.0651497840881348, "rewards_train/2-2": 2.088895320892334, "rewards_train/2-w": 1.0887627601623535, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3297343254089355, "rewards_train/margins_1": 1.0341975688934326, "rewards_train/margins_2": 1.0001325607299805, "step": 111 }, { "epoch": 0.33, "logps_train/policy_1_2": -133.46026611328125, "logps_train/policy_1_l": -101.74290466308594, "logps_train/policy_1_w": -111.2637939453125, "logps_train/policy_2_2": -107.89582061767578, "logps_train/policy_2_w": -132.70718383789062, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 1.4961612224578857, "rewards_train/1-l": -0.8649148941040039, "rewards_train/1-w": 1.4908077716827393, "rewards_train/2-2": 2.182293176651001, "rewards_train/2-w": 1.026155710220337, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.355722665786743, "rewards_train/margins_1": -0.005353450775146484, "rewards_train/margins_2": 1.156137466430664, "step": 111 }, { "epoch": 0.33, "logps_train/policy_1_2": -178.49990844726562, "logps_train/policy_1_l": -86.56812286376953, "logps_train/policy_1_w": -109.95193481445312, "logps_train/policy_2_2": -152.3375244140625, "logps_train/policy_2_w": -144.0277557373047, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -79.5, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.9343843460083008, "rewards_train/1-l": -0.7155278921127319, "rewards_train/1-w": 1.293184757232666, "rewards_train/2-2": 1.5146844387054443, "rewards_train/2-w": 0.16226308047771454, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.008712649345398, "rewards_train/margins_1": 0.35880041122436523, "rewards_train/margins_2": 1.3524213582277298, "step": 111 }, { "epoch": 0.33, "logps_train/policy_1_2": -143.13787841796875, "logps_train/policy_1_l": -120.43206787109375, "logps_train/policy_1_w": -111.86956787109375, "logps_train/policy_2_2": -106.87330627441406, "logps_train/policy_2_w": -146.94406127929688, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 0.9065240621566772, "rewards_train/1-l": -0.4760189354419708, "rewards_train/1-w": 1.7208553552627563, "rewards_train/2-2": 1.6798570156097412, "rewards_train/2-w": 0.6540307998657227, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.196874290704727, "rewards_train/margins_1": 0.8143312931060791, "rewards_train/margins_2": 1.0258262157440186, "step": 111 }, { "epoch": 0.33, "logps_train/policy_1_2": -155.84339904785156, "logps_train/policy_1_l": -137.269287109375, "logps_train/policy_1_w": -117.7096939086914, "logps_train/policy_2_2": -124.88811492919922, "logps_train/policy_2_w": -147.86181640625, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.5297222137451172, "rewards_train/1-l": -1.2278308868408203, "rewards_train/1-w": 1.6414084434509277, "rewards_train/2-2": 2.0705642700195312, "rewards_train/2-w": 1.2040537595748901, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.869239330291748, "rewards_train/margins_1": 0.11168622970581055, "rewards_train/margins_2": 0.8665105104446411, "step": 111 }, { "epoch": 0.33, "logps_train/policy_1_2": -119.53640747070312, "logps_train/policy_1_l": -87.57972717285156, "logps_train/policy_1_w": -90.72237396240234, "logps_train/policy_2_2": -97.29052734375, "logps_train/policy_2_w": -104.74909210205078, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 0.851046621799469, "rewards_train/1-l": -0.6205698847770691, "rewards_train/1-w": 0.9715127944946289, "rewards_train/2-2": 1.3646968603134155, "rewards_train/2-w": 0.6430593729019165, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.592082679271698, "rewards_train/margins_1": 0.12046617269515991, "rewards_train/margins_2": 0.721637487411499, "step": 111 }, { "epoch": 0.33, "logps_train/policy_1_2": -136.34683227539062, "logps_train/policy_1_l": -132.27679443359375, "logps_train/policy_1_w": -129.13076782226562, "logps_train/policy_2_2": -119.59565734863281, "logps_train/policy_2_w": -156.29042053222656, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.1973485946655273, "rewards_train/1-l": -0.8292404413223267, "rewards_train/1-w": 1.5103607177734375, "rewards_train/2-2": 1.6779340505599976, "rewards_train/2-w": 0.6412703990936279, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.339601159095764, "rewards_train/margins_1": 0.31301212310791016, "rewards_train/margins_2": 1.0366636514663696, "step": 111 }, { "epoch": 0.34, "learning_rate": 4.816724018579584e-06, "loss": 0.8535, "step": 112 }, { "epoch": 0.34, "logps_train/policy_1_2": -221.70306396484375, "logps_train/policy_1_l": -293.8974914550781, "logps_train/policy_1_w": -154.75595092773438, "logps_train/policy_2_2": -176.53306579589844, "logps_train/policy_2_w": -202.30137634277344, "logps_train/ref_1_2": -241.0, "logps_train/ref_1_l": -268.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -211.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.8843815326690674, "rewards_train/1-l": -2.6030306816101074, "rewards_train/1-w": 3.455655574798584, "rewards_train/2-2": 3.4107556343078613, "rewards_train/2-w": 2.001112699508667, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.058686256408691, "rewards_train/margins_1": 1.5712740421295166, "rewards_train/margins_2": 1.4096429347991943, "step": 112 }, { "epoch": 0.34, "logps_train/policy_1_2": -111.45323181152344, "logps_train/policy_1_l": -225.42422485351562, "logps_train/policy_1_w": -131.5424041748047, "logps_train/policy_2_2": -90.35613250732422, "logps_train/policy_2_w": -163.63565063476562, "logps_train/ref_1_2": -122.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 1.0648324489593506, "rewards_train/1-l": -1.5559000968933105, "rewards_train/1-w": 1.5098228454589844, "rewards_train/2-2": 1.1929023265838623, "rewards_train/2-w": 0.5184655785560608, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.065722942352295, "rewards_train/margins_1": 0.4449903964996338, "rewards_train/margins_2": 0.6744367480278015, "step": 112 }, { "epoch": 0.34, "logps_train/policy_1_2": -181.6224365234375, "logps_train/policy_1_l": -95.53187561035156, "logps_train/policy_1_w": -106.94178771972656, "logps_train/policy_2_2": -145.24700927734375, "logps_train/policy_2_w": -129.5873565673828, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 1.254944086074829, "rewards_train/1-l": -1.2645152807235718, "rewards_train/1-w": 2.489415168762207, "rewards_train/2-2": 2.712409019470215, "rewards_train/2-w": 1.9756394624710083, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.753930449485779, "rewards_train/margins_1": 1.234471082687378, "rewards_train/margins_2": 0.7367695569992065, "step": 112 }, { "epoch": 0.34, "logps_train/policy_1_2": -136.85641479492188, "logps_train/policy_1_l": -96.96973419189453, "logps_train/policy_1_w": -129.66192626953125, "logps_train/policy_2_2": -98.79661560058594, "logps_train/policy_2_w": -171.5315399169922, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.06357741355896, "rewards_train/1-l": -0.7014657855033875, "rewards_train/1-w": 2.3369321823120117, "rewards_train/2-2": 2.0797131061553955, "rewards_train/2-w": 1.074971318244934, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.038397967815399, "rewards_train/margins_1": 1.2733547687530518, "rewards_train/margins_2": 1.0047417879104614, "step": 112 }, { "epoch": 0.34, "logps_train/policy_1_2": -186.10043334960938, "logps_train/policy_1_l": -140.9269256591797, "logps_train/policy_1_w": -138.83021545410156, "logps_train/policy_2_2": -147.25013732910156, "logps_train/policy_2_w": -166.56817626953125, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 2.1962053775787354, "rewards_train/1-l": -1.4608566761016846, "rewards_train/1-w": 2.8433451652526855, "rewards_train/2-2": 3.1827991008758545, "rewards_train/2-w": 2.2400569915771484, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.30420184135437, "rewards_train/margins_1": 0.6471397876739502, "rewards_train/margins_2": 0.942742109298706, "step": 112 }, { "epoch": 0.34, "logps_train/policy_1_2": -219.92710876464844, "logps_train/policy_1_l": -240.54750061035156, "logps_train/policy_1_w": -162.48007202148438, "logps_train/policy_2_2": -185.13450622558594, "logps_train/policy_2_w": -186.9305419921875, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -220.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 1.7697882652282715, "rewards_train/1-l": -2.0449843406677246, "rewards_train/1-w": 2.3287510871887207, "rewards_train/2-2": 2.386549711227417, "rewards_train/2-w": 1.8139768838882446, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.373735427856445, "rewards_train/margins_1": 0.5589628219604492, "rewards_train/margins_2": 0.5725728273391724, "step": 112 }, { "epoch": 0.34, "logps_train/policy_1_2": -85.38292694091797, "logps_train/policy_1_l": -109.8797378540039, "logps_train/policy_1_w": -65.17713928222656, "logps_train/policy_2_2": -67.25174713134766, "logps_train/policy_2_w": -88.78742218017578, "logps_train/ref_1_2": -98.5, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -81.0, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -99.0, "rewards_train/1-2": 1.277722716331482, "rewards_train/1-l": -1.7378759384155273, "rewards_train/1-w": 1.543614387512207, "rewards_train/2-2": 1.7720907926559448, "rewards_train/2-w": 0.9771172404289246, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2814903259277344, "rewards_train/margins_1": 0.2658916711807251, "rewards_train/margins_2": 0.7949735522270203, "step": 112 }, { "epoch": 0.34, "logps_train/policy_1_2": -182.79833984375, "logps_train/policy_1_l": -169.86329650878906, "logps_train/policy_1_w": -110.782470703125, "logps_train/policy_2_2": -151.03709411621094, "logps_train/policy_2_w": -137.60533142089844, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.6840320825576782, "rewards_train/1-l": -1.110156536102295, "rewards_train/1-w": 1.722534418106079, "rewards_train/2-2": 2.520117998123169, "rewards_train/2-w": 1.0328261852264404, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.832690954208374, "rewards_train/margins_1": 0.03850233554840088, "rewards_train/margins_2": 1.4872918128967285, "step": 112 }, { "epoch": 0.34, "logps_train/policy_1_2": -188.5045166015625, "logps_train/policy_1_l": -279.7410583496094, "logps_train/policy_1_w": -129.3558349609375, "logps_train/policy_2_2": -150.42088317871094, "logps_train/policy_2_w": -166.54086303710938, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -247.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.9401741027832031, "rewards_train/1-l": -3.2744953632354736, "rewards_train/1-w": 1.986292839050293, "rewards_train/2-2": 2.98056697845459, "rewards_train/2-w": 0.9990383386611938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.260788202285767, "rewards_train/margins_1": 0.046118736267089844, "rewards_train/margins_2": 1.981528639793396, "step": 113 }, { "epoch": 0.34, "logps_train/policy_1_2": -217.54794311523438, "logps_train/policy_1_l": -171.24282836914062, "logps_train/policy_1_w": -153.8856964111328, "logps_train/policy_2_2": -171.31675720214844, "logps_train/policy_2_w": -196.7755889892578, "logps_train/ref_1_2": -241.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": 2.380507469177246, "rewards_train/1-l": -0.6249170303344727, "rewards_train/1-w": 1.9967331886291504, "rewards_train/2-2": 3.4356093406677246, "rewards_train/2-w": 0.9709754586219788, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.621650218963623, "rewards_train/margins_1": -0.3837742805480957, "rewards_train/margins_2": 2.464633882045746, "step": 113 }, { "epoch": 0.34, "logps_train/policy_1_2": -115.0455322265625, "logps_train/policy_1_l": -97.44364929199219, "logps_train/policy_1_w": -105.53932189941406, "logps_train/policy_2_2": -93.54861450195312, "logps_train/policy_2_w": -130.88938903808594, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 0.9016964435577393, "rewards_train/1-l": -0.8823161125183105, "rewards_train/1-w": 1.5045037269592285, "rewards_train/2-2": 1.3107643127441406, "rewards_train/2-w": 1.076155424118042, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.386819839477539, "rewards_train/margins_1": 0.6028072834014893, "rewards_train/margins_2": 0.23460888862609863, "step": 113 }, { "epoch": 0.34, "logps_train/policy_1_2": -167.0753173828125, "logps_train/policy_1_l": -194.29586791992188, "logps_train/policy_1_w": -168.9200439453125, "logps_train/policy_2_2": -136.55149841308594, "logps_train/policy_2_w": -208.93115234375, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.2924693822860718, "rewards_train/1-l": -2.2538061141967773, "rewards_train/1-w": 2.0954952239990234, "rewards_train/2-2": 2.0475854873657227, "rewards_train/2-w": 1.1381343603134155, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.349301338195801, "rewards_train/margins_1": 0.8030258417129517, "rewards_train/margins_2": 0.9094511270523071, "step": 113 }, { "epoch": 0.34, "logps_train/policy_1_2": -132.76834106445312, "logps_train/policy_1_l": -183.14920043945312, "logps_train/policy_1_w": -154.0052490234375, "logps_train/policy_2_2": -120.7313003540039, "logps_train/policy_2_w": -171.84329223632812, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 0.8418171405792236, "rewards_train/1-l": -1.547438621520996, "rewards_train/1-w": 1.9024043083190918, "rewards_train/2-2": 1.1767722368240356, "rewards_train/2-w": 1.1508278846740723, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 3.449842929840088, "rewards_train/margins_1": 1.0605871677398682, "rewards_train/margins_2": 0.02594435214996338, "step": 113 }, { "epoch": 0.34, "logps_train/policy_1_2": -196.87103271484375, "logps_train/policy_1_l": -189.2867431640625, "logps_train/policy_1_w": -175.25074768066406, "logps_train/policy_2_2": -161.93060302734375, "logps_train/policy_2_w": -220.50363159179688, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 1.8886780738830566, "rewards_train/1-l": -1.345863699913025, "rewards_train/1-w": 2.2686753273010254, "rewards_train/2-2": 2.6362364292144775, "rewards_train/2-w": 1.1414332389831543, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6145390272140503, "rewards_train/margins_1": 0.37999725341796875, "rewards_train/margins_2": 1.4948031902313232, "step": 113 }, { "epoch": 0.34, "logps_train/policy_1_2": -237.64285278320312, "logps_train/policy_1_l": -189.95706176757812, "logps_train/policy_1_w": -109.33525085449219, "logps_train/policy_2_2": -193.51988220214844, "logps_train/policy_2_w": -133.83566284179688, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 0.3888409435749054, "rewards_train/1-l": -1.5086950063705444, "rewards_train/1-w": 1.8645222187042236, "rewards_train/2-2": 1.4151995182037354, "rewards_train/2-w": 1.3984646797180176, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.373217225074768, "rewards_train/margins_1": 1.4756812751293182, "rewards_train/margins_2": 0.016734838485717773, "step": 113 }, { "epoch": 0.34, "logps_train/policy_1_2": -158.26513671875, "logps_train/policy_1_l": -104.96542358398438, "logps_train/policy_1_w": -96.08796691894531, "logps_train/policy_2_2": -128.62086486816406, "logps_train/policy_2_w": -130.279296875, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 0.9887197017669678, "rewards_train/1-l": -0.6676361560821533, "rewards_train/1-w": 2.007218837738037, "rewards_train/2-2": 1.9265854358673096, "rewards_train/2-w": 1.2184557914733887, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.6748549938201904, "rewards_train/margins_1": 1.0184991359710693, "rewards_train/margins_2": 0.7081296443939209, "step": 113 }, { "epoch": 0.34, "learning_rate": 4.807328905014201e-06, "loss": 0.7703, "step": 114 }, { "epoch": 0.34, "logps_train/policy_1_2": -98.34521484375, "logps_train/policy_1_l": -72.00332641601562, "logps_train/policy_1_w": -74.06834411621094, "logps_train/policy_2_2": -81.00005340576172, "logps_train/policy_2_w": -87.3671875, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -66.5, "logps_train/ref_1_w": -82.0, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": 1.0592288970947266, "rewards_train/1-l": -0.5524808764457703, "rewards_train/1-w": 0.804493248462677, "rewards_train/2-2": 1.5078072547912598, "rewards_train/2-w": 0.4656250476837158, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.3569741249084473, "rewards_train/margins_1": -0.25473564863204956, "rewards_train/margins_2": 1.042182207107544, "step": 114 }, { "epoch": 0.34, "logps_train/policy_1_2": -189.88882446289062, "logps_train/policy_1_l": -153.44717407226562, "logps_train/policy_1_w": -130.21481323242188, "logps_train/policy_2_2": -157.80776977539062, "logps_train/policy_2_w": -159.2537841796875, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 0.9408047199249268, "rewards_train/1-l": -0.9962795972824097, "rewards_train/1-w": 1.905080795288086, "rewards_train/2-2": 1.641098976135254, "rewards_train/2-w": 1.1839977502822876, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.9013603925704956, "rewards_train/margins_1": 0.9642760753631592, "rewards_train/margins_2": 0.4571012258529663, "step": 114 }, { "epoch": 0.34, "logps_train/policy_1_2": -86.09222412109375, "logps_train/policy_1_l": -142.0778045654297, "logps_train/policy_1_w": -149.56277465820312, "logps_train/policy_2_2": -68.24400329589844, "logps_train/policy_2_w": -188.42144775390625, "logps_train/ref_1_2": -99.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -85.5, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.2814030647277832, "rewards_train/1-l": -1.6019208431243896, "rewards_train/1-w": 2.443723678588867, "rewards_train/2-2": 1.7209126949310303, "rewards_train/2-w": 1.3516061305999756, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.045644521713257, "rewards_train/margins_1": 1.162320613861084, "rewards_train/margins_2": 0.3693065643310547, "step": 114 }, { "epoch": 0.34, "logps_train/policy_1_2": -99.1773681640625, "logps_train/policy_1_l": -109.99534606933594, "logps_train/policy_1_w": -77.90715026855469, "logps_train/policy_2_2": -77.97157287597656, "logps_train/policy_2_w": -104.20552062988281, "logps_train/ref_1_2": -103.5, "logps_train/ref_1_l": -100.5, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 0.43279993534088135, "rewards_train/1-l": -0.9368388652801514, "rewards_train/1-w": 1.1979563236236572, "rewards_train/2-2": 0.915245532989502, "rewards_train/2-w": 0.6556199789047241, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.1347951889038086, "rewards_train/margins_1": 0.7651563882827759, "rewards_train/margins_2": 0.25962555408477783, "step": 114 }, { "epoch": 0.34, "logps_train/policy_1_2": -167.45205688476562, "logps_train/policy_1_l": -173.6669921875, "logps_train/policy_1_w": -117.91423034667969, "logps_train/policy_2_2": -142.33685302734375, "logps_train/policy_2_w": -141.2572021484375, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.5469809770584106, "rewards_train/1-l": -1.8772468566894531, "rewards_train/1-w": 1.6710765361785889, "rewards_train/2-2": 2.2991275787353516, "rewards_train/2-w": 1.0805296897888184, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.548323392868042, "rewards_train/margins_1": 0.12409555912017822, "rewards_train/margins_2": 1.2185978889465332, "step": 114 }, { "epoch": 0.34, "logps_train/policy_1_2": -106.85348510742188, "logps_train/policy_1_l": -128.29380798339844, "logps_train/policy_1_w": -93.79243469238281, "logps_train/policy_2_2": -83.58580017089844, "logps_train/policy_2_w": -121.09800720214844, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.8716830015182495, "rewards_train/1-l": -1.1342644691467285, "rewards_train/1-w": 1.755131483078003, "rewards_train/2-2": 1.170717477798462, "rewards_train/2-w": 1.2109023332595825, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.8893959522247314, "rewards_train/margins_1": 0.8834484815597534, "rewards_train/margins_2": -0.040184855461120605, "step": 114 }, { "epoch": 0.34, "logps_train/policy_1_2": -126.28885650634766, "logps_train/policy_1_l": -154.0269775390625, "logps_train/policy_1_w": -126.10009765625, "logps_train/policy_2_2": -112.49977111816406, "logps_train/policy_2_w": -143.3630828857422, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.2461142539978027, "rewards_train/1-l": -0.9425411224365234, "rewards_train/1-w": 1.1743656396865845, "rewards_train/2-2": 1.5578348636627197, "rewards_train/2-w": 0.5027550458908081, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.116906762123108, "rewards_train/margins_1": -0.07174861431121826, "rewards_train/margins_2": 1.0550798177719116, "step": 114 }, { "epoch": 0.34, "logps_train/policy_1_2": -207.103759765625, "logps_train/policy_1_l": -213.1912384033203, "logps_train/policy_1_w": -165.98779296875, "logps_train/policy_2_2": -164.8193359375, "logps_train/policy_2_w": -227.37452697753906, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -213.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -235.0, "rewards_train/1-2": 1.166968822479248, "rewards_train/1-l": -0.04998402297496796, "rewards_train/1-w": 2.548095464706421, "rewards_train/2-2": 1.9727530479431152, "rewards_train/2-w": 0.7695786356925964, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.598079487681389, "rewards_train/margins_1": 1.3811266422271729, "rewards_train/margins_2": 1.2031744122505188, "step": 114 }, { "epoch": 0.34, "logps_train/policy_1_2": -128.7781219482422, "logps_train/policy_1_l": -155.8273162841797, "logps_train/policy_1_w": -113.30996704101562, "logps_train/policy_2_2": -103.06753540039062, "logps_train/policy_2_w": -138.3736572265625, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 1.465156078338623, "rewards_train/1-l": -1.3387858867645264, "rewards_train/1-w": 1.2315036058425903, "rewards_train/2-2": 1.7846527099609375, "rewards_train/2-w": 0.8829460740089417, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.5702894926071167, "rewards_train/margins_1": -0.23365247249603271, "rewards_train/margins_2": 0.9017066359519958, "step": 115 }, { "epoch": 0.34, "logps_train/policy_1_2": -110.59523010253906, "logps_train/policy_1_l": -65.40239715576172, "logps_train/policy_1_w": -93.64466094970703, "logps_train/policy_2_2": -89.02855682373047, "logps_train/policy_2_w": -114.26507568359375, "logps_train/ref_1_2": -118.0, "logps_train/ref_1_l": -61.5, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -99.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 0.7342267632484436, "rewards_train/1-l": -0.3746144771575928, "rewards_train/1-w": 1.6042840480804443, "rewards_train/2-2": 0.9768316745758057, "rewards_train/2-w": 0.9609930515289307, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.978898525238037, "rewards_train/margins_1": 0.8700572848320007, "rewards_train/margins_2": 0.015838623046875, "step": 115 }, { "epoch": 0.34, "logps_train/policy_1_2": -102.07820129394531, "logps_train/policy_1_l": -147.82296752929688, "logps_train/policy_1_w": -105.6551513671875, "logps_train/policy_2_2": -81.36136627197266, "logps_train/policy_2_w": -132.04519653320312, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -95.5, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 1.202042818069458, "rewards_train/1-l": -0.39430850744247437, "rewards_train/1-w": 1.9923954010009766, "rewards_train/2-2": 1.4188437461853027, "rewards_train/2-w": 1.4896206855773926, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.386703908443451, "rewards_train/margins_1": 0.7903525829315186, "rewards_train/margins_2": -0.07077693939208984, "step": 115 }, { "epoch": 0.34, "logps_train/policy_1_2": -21.535324096679688, "logps_train/policy_1_l": -11.938358306884766, "logps_train/policy_1_w": -30.590499877929688, "logps_train/policy_2_2": -13.625747680664062, "logps_train/policy_2_w": -43.54043960571289, "logps_train/ref_1_2": -24.75, "logps_train/ref_1_l": -10.25, "logps_train/ref_1_w": -40.0, "logps_train/ref_2_2": -16.625, "logps_train/ref_2_w": -48.0, "rewards_train/1-2": 0.314045786857605, "rewards_train/1-l": -0.17113076150417328, "rewards_train/1-w": 0.945637583732605, "rewards_train/2-2": 0.3018784523010254, "rewards_train/2-w": 0.45689359307289124, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.1167683452367783, "rewards_train/margins_1": 0.631591796875, "rewards_train/margins_2": -0.15501514077186584, "step": 115 }, { "epoch": 0.34, "logps_train/policy_1_2": -141.2478790283203, "logps_train/policy_1_l": -92.86476135253906, "logps_train/policy_1_w": -58.118675231933594, "logps_train/policy_2_2": -106.22682189941406, "logps_train/policy_2_w": -79.31409454345703, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -69.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -86.0, "rewards_train/1-2": 0.6963056921958923, "rewards_train/1-l": -1.5155773162841797, "rewards_train/1-w": 1.0600075721740723, "rewards_train/2-2": 1.8148176670074463, "rewards_train/2-w": 0.7029657363891602, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.575584888458252, "rewards_train/margins_1": 0.36370187997817993, "rewards_train/margins_2": 1.1118519306182861, "step": 115 }, { "epoch": 0.34, "logps_train/policy_1_2": -100.98616027832031, "logps_train/policy_1_l": -117.90518951416016, "logps_train/policy_1_w": -85.41886901855469, "logps_train/policy_2_2": -82.63492584228516, "logps_train/policy_2_w": -105.52073669433594, "logps_train/ref_1_2": -109.5, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -104.5, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 0.8689625263214111, "rewards_train/1-l": -0.9584875106811523, "rewards_train/1-w": 1.907332181930542, "rewards_train/2-2": 1.3876795768737793, "rewards_train/2-w": 1.4285905361175537, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.8658196926116943, "rewards_train/margins_1": 1.0383696556091309, "rewards_train/margins_2": -0.040910959243774414, "step": 115 }, { "epoch": 0.34, "logps_train/policy_1_2": -141.41966247558594, "logps_train/policy_1_l": -170.85719299316406, "logps_train/policy_1_w": -126.24171447753906, "logps_train/policy_2_2": -114.93978881835938, "logps_train/policy_2_w": -169.2666473388672, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.4017839431762695, "rewards_train/1-l": -1.758181095123291, "rewards_train/1-w": 2.1914544105529785, "rewards_train/2-2": 2.099771499633789, "rewards_train/2-w": 0.8577097654342651, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9496355056762695, "rewards_train/margins_1": 0.789670467376709, "rewards_train/margins_2": 1.242061734199524, "step": 115 }, { "epoch": 0.34, "logps_train/policy_1_2": -118.70278930664062, "logps_train/policy_1_l": -196.48741149902344, "logps_train/policy_1_w": -156.03048706054688, "logps_train/policy_2_2": -103.13877868652344, "logps_train/policy_2_w": -183.1428680419922, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.5971035957336426, "rewards_train/1-l": -1.1462023258209229, "rewards_train/1-w": 2.016092300415039, "rewards_train/2-2": 1.6884663105010986, "rewards_train/2-w": 1.1095422506332397, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.162294626235962, "rewards_train/margins_1": 0.4189887046813965, "rewards_train/margins_2": 0.5789240598678589, "step": 115 }, { "epoch": 0.35, "learning_rate": 4.797708600215259e-06, "loss": 0.954, "step": 116 }, { "epoch": 0.35, "logps_train/policy_1_2": -116.6022720336914, "logps_train/policy_1_l": -109.40988159179688, "logps_train/policy_1_w": -119.22035217285156, "logps_train/policy_2_2": -94.85506439208984, "logps_train/policy_2_w": -144.00486755371094, "logps_train/ref_1_2": -121.5, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 0.505398154258728, "rewards_train/1-l": -0.7964560389518738, "rewards_train/1-w": 1.5490577220916748, "rewards_train/2-2": 1.3014074563980103, "rewards_train/2-w": 1.082520604133606, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.3455137610435486, "rewards_train/margins_1": 1.0436595678329468, "rewards_train/margins_2": 0.2188868522644043, "step": 116 }, { "epoch": 0.35, "logps_train/policy_1_2": -114.74687957763672, "logps_train/policy_1_l": -85.33222961425781, "logps_train/policy_1_w": -103.74481201171875, "logps_train/policy_2_2": -89.24992370605469, "logps_train/policy_2_w": -131.37432861328125, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -76.5, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": 0.8792190551757812, "rewards_train/1-l": -0.8830268383026123, "rewards_train/1-w": 1.5747363567352295, "rewards_train/2-2": 1.5671947002410889, "rewards_train/2-w": 0.5570986270904541, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.457763195037842, "rewards_train/margins_1": 0.6955173015594482, "rewards_train/margins_2": 1.0100960731506348, "step": 116 }, { "epoch": 0.35, "logps_train/policy_1_2": -145.95272827148438, "logps_train/policy_1_l": -109.35629272460938, "logps_train/policy_1_w": -104.03621673583984, "logps_train/policy_2_2": -121.8403091430664, "logps_train/policy_2_w": -118.3968276977539, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 1.6937901973724365, "rewards_train/1-l": -0.42391031980514526, "rewards_train/1-w": 1.6950113773345947, "rewards_train/2-2": 2.0878443717956543, "rewards_train/2-w": 1.4503557682037354, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.11892169713974, "rewards_train/margins_1": 0.0012211799621582031, "rewards_train/margins_2": 0.637488603591919, "step": 116 }, { "epoch": 0.35, "logps_train/policy_1_2": -145.88418579101562, "logps_train/policy_1_l": -121.02809143066406, "logps_train/policy_1_w": -98.76526641845703, "logps_train/policy_2_2": -121.63308715820312, "logps_train/policy_2_w": -122.07009887695312, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -112.5, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 0.7522062659263611, "rewards_train/1-l": -0.8512459993362427, "rewards_train/1-w": 1.5172232389450073, "rewards_train/2-2": 1.7241917848587036, "rewards_train/2-w": 0.9258027672767639, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.36846923828125, "rewards_train/margins_1": 0.7650169730186462, "rewards_train/margins_2": 0.7983890175819397, "step": 116 }, { "epoch": 0.35, "logps_train/policy_1_2": -120.7373046875, "logps_train/policy_1_l": -205.13717651367188, "logps_train/policy_1_w": -74.34037780761719, "logps_train/policy_2_2": -93.17780303955078, "logps_train/policy_2_w": -95.58084869384766, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -85.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -102.0, "rewards_train/1-2": 0.7247068285942078, "rewards_train/1-l": -3.215963363647461, "rewards_train/1-w": 1.0667431354522705, "rewards_train/2-2": 1.4572196006774902, "rewards_train/2-w": 0.6130086183547974, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.2827064990997314, "rewards_train/margins_1": 0.34203630685806274, "rewards_train/margins_2": 0.8442109823226929, "step": 116 }, { "epoch": 0.35, "logps_train/policy_1_2": -221.38461303710938, "logps_train/policy_1_l": -224.21218872070312, "logps_train/policy_1_w": -184.7220916748047, "logps_train/policy_2_2": -168.13414001464844, "logps_train/policy_2_w": -251.02627563476562, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 1.1146647930145264, "rewards_train/1-l": -2.3759069442749023, "rewards_train/1-w": 2.5246665477752686, "rewards_train/2-2": 2.649085521697998, "rewards_train/2-w": 0.5817470550537109, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.900573492050171, "rewards_train/margins_1": 1.4100017547607422, "rewards_train/margins_2": 2.067338466644287, "step": 116 }, { "epoch": 0.35, "logps_train/policy_1_2": -175.4486083984375, "logps_train/policy_1_l": -214.14630126953125, "logps_train/policy_1_w": -160.3089141845703, "logps_train/policy_2_2": -142.0191650390625, "logps_train/policy_2_w": -197.40225219726562, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -193.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 0.6363890767097473, "rewards_train/1-l": -2.1333813667297363, "rewards_train/1-w": 2.162858486175537, "rewards_train/2-2": 1.5699580907821655, "rewards_train/2-w": 1.2097758054733276, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.296239852905273, "rewards_train/margins_1": 1.5264694094657898, "rewards_train/margins_2": 0.3601822853088379, "step": 116 }, { "epoch": 0.35, "logps_train/policy_1_2": -179.2683868408203, "logps_train/policy_1_l": -169.22312927246094, "logps_train/policy_1_w": -145.1800537109375, "logps_train/policy_2_2": -158.63772583007812, "logps_train/policy_2_w": -169.92471313476562, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.9684730768203735, "rewards_train/1-l": -1.3204576969146729, "rewards_train/1-w": 2.1976194381713867, "rewards_train/2-2": 2.6081020832061768, "rewards_train/2-w": 1.4137786626815796, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.5180771350860596, "rewards_train/margins_1": 0.22914636135101318, "rewards_train/margins_2": 1.1943234205245972, "step": 116 }, { "epoch": 0.35, "logps_train/policy_1_2": -86.77964782714844, "logps_train/policy_1_l": -79.21385955810547, "logps_train/policy_1_w": -50.992225646972656, "logps_train/policy_2_2": -76.41360473632812, "logps_train/policy_2_w": -60.99049377441406, "logps_train/ref_1_2": -98.0, "logps_train/ref_1_l": -76.0, "logps_train/ref_1_w": -60.5, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -68.0, "rewards_train/1-2": 1.115785837173462, "rewards_train/1-l": -0.34912046790122986, "rewards_train/1-w": 0.9601523876190186, "rewards_train/2-2": 1.5828582048416138, "rewards_train/2-w": 0.7118884921073914, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.3092728555202484, "rewards_train/margins_1": -0.15563344955444336, "rewards_train/margins_2": 0.8709697127342224, "step": 117 }, { "epoch": 0.35, "logps_train/policy_1_2": -216.646240234375, "logps_train/policy_1_l": -222.38470458984375, "logps_train/policy_1_w": -125.57059478759766, "logps_train/policy_2_2": -174.39317321777344, "logps_train/policy_2_w": -165.96514892578125, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 2.0650641918182373, "rewards_train/1-l": -2.0283141136169434, "rewards_train/1-w": 2.46989369392395, "rewards_train/2-2": 3.1770880222320557, "rewards_train/2-w": 1.252899169921875, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.4982078075408936, "rewards_train/margins_1": 0.4048295021057129, "rewards_train/margins_2": 1.9241888523101807, "step": 117 }, { "epoch": 0.35, "logps_train/policy_1_2": -137.38198852539062, "logps_train/policy_1_l": -169.0266876220703, "logps_train/policy_1_w": -101.61720275878906, "logps_train/policy_2_2": -110.37625122070312, "logps_train/policy_2_w": -119.02798461914062, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.6407077312469482, "rewards_train/1-l": -2.2588207721710205, "rewards_train/1-w": 1.9349596500396729, "rewards_train/2-2": 2.249483585357666, "rewards_train/2-w": 1.275716781616211, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.193780422210693, "rewards_train/margins_1": 0.2942519187927246, "rewards_train/margins_2": 0.9737668037414551, "step": 117 }, { "epoch": 0.35, "logps_train/policy_1_2": -176.90185546875, "logps_train/policy_1_l": -156.36859130859375, "logps_train/policy_1_w": -167.59585571289062, "logps_train/policy_2_2": -141.9099884033203, "logps_train/policy_2_w": -198.81182861328125, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 0.962938666343689, "rewards_train/1-l": -0.7968440055847168, "rewards_train/1-w": 2.243539333343506, "rewards_train/2-2": 2.0308761596679688, "rewards_train/2-w": 1.2485041618347168, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0403833389282227, "rewards_train/margins_1": 1.280600666999817, "rewards_train/margins_2": 0.782371997833252, "step": 117 }, { "epoch": 0.35, "logps_train/policy_1_2": -184.0050048828125, "logps_train/policy_1_l": -140.2847137451172, "logps_train/policy_1_w": -121.0569839477539, "logps_train/policy_2_2": -162.7633056640625, "logps_train/policy_2_w": -141.7295379638672, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.1338729858398438, "rewards_train/1-l": -1.20757257938385, "rewards_train/1-w": 2.0482077598571777, "rewards_train/2-2": 1.778355360031128, "rewards_train/2-w": 1.486714482307434, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.255780339241028, "rewards_train/margins_1": 0.914334774017334, "rewards_train/margins_2": 0.29164087772369385, "step": 117 }, { "epoch": 0.35, "logps_train/policy_1_2": -151.8090057373047, "logps_train/policy_1_l": -273.2116394042969, "logps_train/policy_1_w": -113.99618530273438, "logps_train/policy_2_2": -125.86518096923828, "logps_train/policy_2_w": -143.82052612304688, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -242.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.1120681762695312, "rewards_train/1-l": -3.0586626529693604, "rewards_train/1-w": 1.6257727146148682, "rewards_train/2-2": 1.880669355392456, "rewards_train/2-w": 1.1460719108581543, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.6844353675842285, "rewards_train/margins_1": 0.5137045383453369, "rewards_train/margins_2": 0.7345974445343018, "step": 117 }, { "epoch": 0.35, "logps_train/policy_1_2": -166.2523193359375, "logps_train/policy_1_l": -203.8486328125, "logps_train/policy_1_w": -160.1619415283203, "logps_train/policy_2_2": -130.2485809326172, "logps_train/policy_2_w": -195.80770874023438, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 1.306017518043518, "rewards_train/1-l": -1.3704090118408203, "rewards_train/1-w": 2.239274263381958, "rewards_train/2-2": 2.2188916206359863, "rewards_train/2-w": 0.9584861993789673, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6096832752227783, "rewards_train/margins_1": 0.9332567453384399, "rewards_train/margins_2": 1.260405421257019, "step": 117 }, { "epoch": 0.35, "logps_train/policy_1_2": -109.64285278320312, "logps_train/policy_1_l": -125.05415344238281, "logps_train/policy_1_w": -95.64300537109375, "logps_train/policy_2_2": -86.65662384033203, "logps_train/policy_2_w": -119.1531982421875, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -101.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 0.7224336862564087, "rewards_train/1-l": -1.4319779872894287, "rewards_train/1-w": 1.6282771825790405, "rewards_train/2-2": 1.421837329864502, "rewards_train/2-w": 0.9510865211486816, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.0602551698684692, "rewards_train/margins_1": 0.9058434963226318, "rewards_train/margins_2": 0.4707508087158203, "step": 117 }, { "epoch": 0.35, "learning_rate": 4.7878640431075466e-06, "loss": 0.8935, "step": 118 }, { "epoch": 0.35, "logps_train/policy_1_2": -58.71574020385742, "logps_train/policy_1_l": -97.61549377441406, "logps_train/policy_1_w": -63.28316879272461, "logps_train/policy_2_2": -42.06779479980469, "logps_train/policy_2_w": -87.1553955078125, "logps_train/ref_1_2": -68.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -77.5, "logps_train/ref_2_2": -53.75, "logps_train/ref_2_w": -93.5, "rewards_train/1-2": 0.9159260392189026, "rewards_train/1-l": -1.6332049369812012, "rewards_train/1-w": 1.406278133392334, "rewards_train/2-2": 1.1562089920043945, "rewards_train/2-w": 0.6289920210838318, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.039483070373535, "rewards_train/margins_1": 0.4903520941734314, "rewards_train/margins_2": 0.5272169709205627, "step": 118 }, { "epoch": 0.35, "logps_train/policy_1_2": -234.172607421875, "logps_train/policy_1_l": -235.76675415039062, "logps_train/policy_1_w": -252.20242309570312, "logps_train/policy_2_2": -188.65835571289062, "logps_train/policy_2_w": -321.16656494140625, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -214.0, "logps_train/ref_1_w": -282.0, "logps_train/ref_2_2": -213.0, "logps_train/ref_2_w": -332.0, "rewards_train/1-2": 1.1858655214309692, "rewards_train/1-l": -2.137319803237915, "rewards_train/1-w": 2.96911358833313, "rewards_train/2-2": 2.416195869445801, "rewards_train/2-w": 1.0852956771850586, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.106433391571045, "rewards_train/margins_1": 1.7832480669021606, "rewards_train/margins_2": 1.3309001922607422, "step": 118 }, { "epoch": 0.35, "logps_train/policy_1_2": -218.2359619140625, "logps_train/policy_1_l": -190.03123474121094, "logps_train/policy_1_w": -154.86468505859375, "logps_train/policy_2_2": -174.89767456054688, "logps_train/policy_2_w": -193.17648315429688, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 1.2344117164611816, "rewards_train/1-l": -1.5326154232025146, "rewards_train/1-w": 2.377594470977783, "rewards_train/2-2": 2.630112648010254, "rewards_train/2-w": 1.1307886838912964, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.910209894180298, "rewards_train/margins_1": 1.1431827545166016, "rewards_train/margins_2": 1.4993239641189575, "step": 118 }, { "epoch": 0.35, "logps_train/policy_1_2": -145.58755493164062, "logps_train/policy_1_l": -110.59536743164062, "logps_train/policy_1_w": -83.19371795654297, "logps_train/policy_2_2": -112.2449951171875, "logps_train/policy_2_w": -107.8916015625, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": 0.5611672401428223, "rewards_train/1-l": -0.8456692099571228, "rewards_train/1-w": 0.99703449010849, "rewards_train/2-2": 1.7700313329696655, "rewards_train/2-w": 0.7444338798522949, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.8427037000656128, "rewards_train/margins_1": 0.4358672499656677, "rewards_train/margins_2": 1.0255974531173706, "step": 118 }, { "epoch": 0.35, "logps_train/policy_1_2": -145.04598999023438, "logps_train/policy_1_l": -136.07286071777344, "logps_train/policy_1_w": -119.1220932006836, "logps_train/policy_2_2": -123.81483459472656, "logps_train/policy_2_w": -144.23593139648438, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.4102442264556885, "rewards_train/1-l": -0.8971298336982727, "rewards_train/1-w": 0.8448224067687988, "rewards_train/2-2": 1.7480080127716064, "rewards_train/2-w": 0.11898583173751831, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 1.7419522404670715, "rewards_train/margins_1": -0.5654218196868896, "rewards_train/margins_2": 1.6290221810340881, "step": 118 }, { "epoch": 0.35, "logps_train/policy_1_2": -117.55309295654297, "logps_train/policy_1_l": -204.02406311035156, "logps_train/policy_1_w": -98.99198913574219, "logps_train/policy_2_2": -95.55928039550781, "logps_train/policy_2_w": -116.79977416992188, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -124.5, "rewards_train/1-2": 1.1528939008712769, "rewards_train/1-l": -2.640589714050293, "rewards_train/1-w": 1.2085163593292236, "rewards_train/2-2": 1.7464160919189453, "rewards_train/2-w": 0.7661165595054626, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8491060733795166, "rewards_train/margins_1": 0.05562245845794678, "rewards_train/margins_2": 0.9802995324134827, "step": 118 }, { "epoch": 0.35, "logps_train/policy_1_2": -124.39493560791016, "logps_train/policy_1_l": -259.8720703125, "logps_train/policy_1_w": -186.81443786621094, "logps_train/policy_2_2": -103.9005355834961, "logps_train/policy_2_w": -233.8498992919922, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -237.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 1.2105062007904053, "rewards_train/1-l": -2.2778303623199463, "rewards_train/1-w": 2.3716814517974854, "rewards_train/2-2": 1.6201026439666748, "rewards_train/2-w": 1.1462597846984863, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.649511814117432, "rewards_train/margins_1": 1.16117525100708, "rewards_train/margins_2": 0.4738428592681885, "step": 118 }, { "epoch": 0.35, "logps_train/policy_1_2": -150.7757568359375, "logps_train/policy_1_l": -150.49569702148438, "logps_train/policy_1_w": -103.64697265625, "logps_train/policy_2_2": -111.17947387695312, "logps_train/policy_2_w": -142.35684204101562, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.6747674942016602, "rewards_train/1-l": -1.6913673877716064, "rewards_train/1-w": 1.9220216274261475, "rewards_train/2-2": 2.535177707672119, "rewards_train/2-w": 0.8361907005310059, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.613389015197754, "rewards_train/margins_1": 0.2472541332244873, "rewards_train/margins_2": 1.6989870071411133, "step": 118 }, { "epoch": 0.36, "logps_train/policy_1_2": -213.22315979003906, "logps_train/policy_1_l": -217.2388916015625, "logps_train/policy_1_w": -142.54251098632812, "logps_train/policy_2_2": -164.45970153808594, "logps_train/policy_2_w": -189.58352661132812, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 0.7214338779449463, "rewards_train/1-l": -1.9239873886108398, "rewards_train/1-w": 1.9995570182800293, "rewards_train/2-2": 2.2243423461914062, "rewards_train/2-w": 0.7951626777648926, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.923544406890869, "rewards_train/margins_1": 1.278123140335083, "rewards_train/margins_2": 1.4291796684265137, "step": 119 }, { "epoch": 0.36, "logps_train/policy_1_2": -154.45697021484375, "logps_train/policy_1_l": -145.96359252929688, "logps_train/policy_1_w": -128.00437927246094, "logps_train/policy_2_2": -129.9292449951172, "logps_train/policy_2_w": -154.4351806640625, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 0.7121148705482483, "rewards_train/1-l": -0.7274137139320374, "rewards_train/1-w": 1.5101096630096436, "rewards_train/2-2": 1.5244578123092651, "rewards_train/2-w": 1.0314828157424927, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.237523376941681, "rewards_train/margins_1": 0.7979947924613953, "rewards_train/margins_2": 0.49297499656677246, "step": 119 }, { "epoch": 0.36, "logps_train/policy_1_2": -172.4166717529297, "logps_train/policy_1_l": -104.0298843383789, "logps_train/policy_1_w": -63.05683135986328, "logps_train/policy_2_2": -135.78530883789062, "logps_train/policy_2_w": -88.0501480102539, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -74.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": 0.6833333373069763, "rewards_train/1-l": -1.408457636833191, "rewards_train/1-w": 1.1240042448043823, "rewards_train/2-2": 1.8292826414108276, "rewards_train/2-w": 0.6856103539466858, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.5324618816375732, "rewards_train/margins_1": 0.440670907497406, "rewards_train/margins_2": 1.1436722874641418, "step": 119 }, { "epoch": 0.36, "logps_train/policy_1_2": -235.07017517089844, "logps_train/policy_1_l": -158.51620483398438, "logps_train/policy_1_w": -112.71096801757812, "logps_train/policy_2_2": -195.86923217773438, "logps_train/policy_2_w": -144.97592163085938, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -220.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.030482530593872, "rewards_train/1-l": -1.690684199333191, "rewards_train/1-w": 2.241403579711914, "rewards_train/2-2": 2.4380764961242676, "rewards_train/2-w": 1.3274074792861938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.932087779045105, "rewards_train/margins_1": 1.210921049118042, "rewards_train/margins_2": 1.1106690168380737, "step": 119 }, { "epoch": 0.36, "logps_train/policy_1_2": -152.68447875976562, "logps_train/policy_1_l": -154.365478515625, "logps_train/policy_1_w": -94.45972442626953, "logps_train/policy_2_2": -129.0612335205078, "logps_train/policy_2_w": -126.54789733886719, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 0.9909279346466064, "rewards_train/1-l": -1.4701406955718994, "rewards_train/1-w": 1.6540271043777466, "rewards_train/2-2": 1.6876271963119507, "rewards_train/2-w": 0.6592733860015869, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.124167799949646, "rewards_train/margins_1": 0.6630991697311401, "rewards_train/margins_2": 1.0283538103103638, "step": 119 }, { "epoch": 0.36, "logps_train/policy_1_2": -227.78195190429688, "logps_train/policy_1_l": -204.78143310546875, "logps_train/policy_1_w": -175.33590698242188, "logps_train/policy_2_2": -187.35784912109375, "logps_train/policy_2_w": -212.889892578125, "logps_train/ref_1_2": -235.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 0.7085229158401489, "rewards_train/1-l": -1.5828301906585693, "rewards_train/1-w": 2.0617213249206543, "rewards_train/2-2": 2.126716136932373, "rewards_train/2-w": 1.2781970500946045, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.6445515155792236, "rewards_train/margins_1": 1.3531984090805054, "rewards_train/margins_2": 0.8485190868377686, "step": 119 }, { "epoch": 0.36, "logps_train/policy_1_2": -118.64205932617188, "logps_train/policy_1_l": -109.89100646972656, "logps_train/policy_1_w": -68.612060546875, "logps_train/policy_2_2": -95.3123779296875, "logps_train/policy_2_w": -93.95236206054688, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -97.5, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -100.5, "rewards_train/1-2": 1.2721220254898071, "rewards_train/1-l": -1.2213270664215088, "rewards_train/1-w": 0.9477781057357788, "rewards_train/2-2": 1.667004942893982, "rewards_train/2-w": 0.6367948651313782, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.1691051721572876, "rewards_train/margins_1": -0.3243439197540283, "rewards_train/margins_2": 1.0302100777626038, "step": 119 }, { "epoch": 0.36, "logps_train/policy_1_2": -107.66779327392578, "logps_train/policy_1_l": -108.00640869140625, "logps_train/policy_1_w": -84.11585998535156, "logps_train/policy_2_2": -88.66177368164062, "logps_train/policy_2_w": -103.97779846191406, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -113.0, "rewards_train/1-2": 1.100407600402832, "rewards_train/1-l": -1.1022038459777832, "rewards_train/1-w": 1.647007703781128, "rewards_train/2-2": 1.8439785242080688, "rewards_train/2-w": 0.9115942716598511, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.749211549758911, "rewards_train/margins_1": 0.5466001033782959, "rewards_train/margins_2": 0.9323842525482178, "step": 119 }, { "epoch": 0.36, "learning_rate": 4.7777961945024834e-06, "loss": 0.9108, "step": 120 }, { "epoch": 0.36, "logps_train/policy_1_2": -230.96527099609375, "logps_train/policy_1_l": -145.24200439453125, "logps_train/policy_1_w": -145.23477172851562, "logps_train/policy_2_2": -194.35028076171875, "logps_train/policy_2_w": -171.7847900390625, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 0.7159728407859802, "rewards_train/1-l": -1.2845515012741089, "rewards_train/1-w": 2.2026937007904053, "rewards_train/2-2": 2.377472400665283, "rewards_train/2-w": 1.5793342590332031, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.487245202064514, "rewards_train/margins_1": 1.486720860004425, "rewards_train/margins_2": 0.7981381416320801, "step": 120 }, { "epoch": 0.36, "logps_train/policy_1_2": -176.17929077148438, "logps_train/policy_1_l": -171.47305297851562, "logps_train/policy_1_w": -143.7998809814453, "logps_train/policy_2_2": -134.27830505371094, "logps_train/policy_2_w": -196.85557556152344, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 0.6320716142654419, "rewards_train/1-l": -1.1574610471725464, "rewards_train/1-w": 2.518449544906616, "rewards_train/2-2": 1.6405290365219116, "rewards_train/2-w": 1.2535046339035034, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.6759105920791626, "rewards_train/margins_1": 1.8863779306411743, "rewards_train/margins_2": 0.3870244026184082, "step": 120 }, { "epoch": 0.36, "logps_train/policy_1_2": -178.7393798828125, "logps_train/policy_1_l": -211.28709411621094, "logps_train/policy_1_w": -142.83648681640625, "logps_train/policy_2_2": -153.29537963867188, "logps_train/policy_2_w": -171.93978881835938, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.3409056663513184, "rewards_train/1-l": -1.4099597930908203, "rewards_train/1-w": 2.1218204498291016, "rewards_train/2-2": 2.0409703254699707, "rewards_train/2-w": 1.195279598236084, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.531780242919922, "rewards_train/margins_1": 0.7809147834777832, "rewards_train/margins_2": 0.8456907272338867, "step": 120 }, { "epoch": 0.36, "logps_train/policy_1_2": -180.66024780273438, "logps_train/policy_1_l": -193.42794799804688, "logps_train/policy_1_w": -109.95658874511719, "logps_train/policy_2_2": -156.78570556640625, "logps_train/policy_2_w": -139.41912841796875, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -124.5, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 2.002725601196289, "rewards_train/1-l": -2.377951145172119, "rewards_train/1-w": 1.4762160778045654, "rewards_train/2-2": 2.6901800632476807, "rewards_train/2-w": 0.8174624443054199, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8541672229766846, "rewards_train/margins_1": -0.5265095233917236, "rewards_train/margins_2": 1.8727176189422607, "step": 120 }, { "epoch": 0.36, "logps_train/policy_1_2": -162.23228454589844, "logps_train/policy_1_l": -128.26121520996094, "logps_train/policy_1_w": -95.95310974121094, "logps_train/policy_2_2": -131.7261505126953, "logps_train/policy_2_w": -125.67842864990234, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -116.5, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 0.9564592838287354, "rewards_train/1-l": -1.1932114362716675, "rewards_train/1-w": 1.9546895027160645, "rewards_train/2-2": 2.1023850440979004, "rewards_train/2-w": 1.201688528060913, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.147900938987732, "rewards_train/margins_1": 0.9982302188873291, "rewards_train/margins_2": 0.9006965160369873, "step": 120 }, { "epoch": 0.36, "logps_train/policy_1_2": -198.6962890625, "logps_train/policy_1_l": -203.85360717773438, "logps_train/policy_1_w": -193.40774536132812, "logps_train/policy_2_2": -167.86038208007812, "logps_train/policy_2_w": -240.1962890625, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -222.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.6866220235824585, "rewards_train/1-l": -1.3525490760803223, "rewards_train/1-w": 2.7943813800811768, "rewards_train/2-2": 2.2670865058898926, "rewards_train/2-w": 1.4334962368011475, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.146930456161499, "rewards_train/margins_1": 1.1077593564987183, "rewards_train/margins_2": 0.8335902690887451, "step": 120 }, { "epoch": 0.36, "logps_train/policy_1_2": -157.2635498046875, "logps_train/policy_1_l": -138.35244750976562, "logps_train/policy_1_w": -114.41583251953125, "logps_train/policy_2_2": -121.61176300048828, "logps_train/policy_2_w": -141.58091735839844, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 0.5173932313919067, "rewards_train/1-l": -1.3211820125579834, "rewards_train/1-w": 1.841228723526001, "rewards_train/2-2": 1.5497605800628662, "rewards_train/2-w": 0.905970573425293, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1624107360839844, "rewards_train/margins_1": 1.3238354921340942, "rewards_train/margins_2": 0.6437900066375732, "step": 120 }, { "epoch": 0.36, "logps_train/policy_1_2": -186.22874450683594, "logps_train/policy_1_l": -139.3095703125, "logps_train/policy_1_w": -125.83575439453125, "logps_train/policy_2_2": -145.186279296875, "logps_train/policy_2_w": -160.61294555664062, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 0.8255631923675537, "rewards_train/1-l": -0.2903319299221039, "rewards_train/1-w": 0.7238461375236511, "rewards_train/2-2": 1.8266849517822266, "rewards_train/2-w": 0.26565852761268616, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 1.014178067445755, "rewards_train/margins_1": -0.10171705484390259, "rewards_train/margins_2": 1.5610264241695404, "step": 120 }, { "epoch": 0.36, "logps_train/policy_1_2": -97.33975982666016, "logps_train/policy_1_l": -133.3414306640625, "logps_train/policy_1_w": -95.84848022460938, "logps_train/policy_2_2": -79.15106201171875, "logps_train/policy_2_w": -121.02433776855469, "logps_train/ref_1_2": -105.5, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 0.8228598833084106, "rewards_train/1-l": -1.7409794330596924, "rewards_train/1-w": 1.4128082990646362, "rewards_train/2-2": 1.3104312419891357, "rewards_train/2-w": 0.677253007888794, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1537877321243286, "rewards_train/margins_1": 0.5899484157562256, "rewards_train/margins_2": 0.6331782341003418, "step": 121 }, { "epoch": 0.36, "logps_train/policy_1_2": -294.79400634765625, "logps_train/policy_1_l": -251.77198791503906, "logps_train/policy_1_w": -195.46978759765625, "logps_train/policy_2_2": -226.8746337890625, "logps_train/policy_2_w": -264.7174072265625, "logps_train/ref_1_2": -296.0, "logps_train/ref_1_l": -234.0, "logps_train/ref_1_w": -222.0, "logps_train/ref_2_2": -246.0, "logps_train/ref_2_w": -274.0, "rewards_train/1-2": 0.172160804271698, "rewards_train/1-l": -1.7705600261688232, "rewards_train/1-w": 2.60770845413208, "rewards_train/2-2": 1.885194182395935, "rewards_train/2-w": 0.8516945838928223, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.378268480300903, "rewards_train/margins_1": 2.435547649860382, "rewards_train/margins_2": 1.0334995985031128, "step": 121 }, { "epoch": 0.36, "logps_train/policy_1_2": -128.7354736328125, "logps_train/policy_1_l": -82.28642272949219, "logps_train/policy_1_w": -123.92762756347656, "logps_train/policy_2_2": -106.79539489746094, "logps_train/policy_2_w": -148.07516479492188, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -76.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 0.99090576171875, "rewards_train/1-l": -0.6143843531608582, "rewards_train/1-w": 2.029893398284912, "rewards_train/2-2": 1.8954601287841797, "rewards_train/2-w": 1.231937050819397, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.6442777514457703, "rewards_train/margins_1": 1.038987636566162, "rewards_train/margins_2": 0.6635230779647827, "step": 121 }, { "epoch": 0.36, "logps_train/policy_1_2": -121.84236907958984, "logps_train/policy_1_l": -148.37130737304688, "logps_train/policy_1_w": -115.07771301269531, "logps_train/policy_2_2": -104.66331481933594, "logps_train/policy_2_w": -133.1958770751953, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.065763235092163, "rewards_train/1-l": -1.4738487005233765, "rewards_train/1-w": 0.7355874180793762, "rewards_train/2-2": 1.330152988433838, "rewards_train/2-w": 0.26439714431762695, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.2094361186027527, "rewards_train/margins_1": -0.33017581701278687, "rewards_train/margins_2": 1.065755844116211, "step": 121 }, { "epoch": 0.36, "logps_train/policy_1_2": -201.79177856445312, "logps_train/policy_1_l": -163.93405151367188, "logps_train/policy_1_w": -170.7311248779297, "logps_train/policy_2_2": -173.53009033203125, "logps_train/policy_2_w": -227.17324829101562, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.4563690423965454, "rewards_train/1-l": -1.5965300798416138, "rewards_train/1-w": 2.3304028511047363, "rewards_train/2-2": 2.2872238159179688, "rewards_train/2-w": 0.5920494794845581, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.92693293094635, "rewards_train/margins_1": 0.8740338087081909, "rewards_train/margins_2": 1.6951743364334106, "step": 121 }, { "epoch": 0.36, "logps_train/policy_1_2": -173.32644653320312, "logps_train/policy_1_l": -151.40365600585938, "logps_train/policy_1_w": -123.62504577636719, "logps_train/policy_2_2": -142.06069946289062, "logps_train/policy_2_w": -155.3850555419922, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 0.7196995615959167, "rewards_train/1-l": -1.4413918256759644, "rewards_train/1-w": 1.4468703269958496, "rewards_train/2-2": 1.5647071599960327, "rewards_train/2-w": 0.9193065762519836, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.888262152671814, "rewards_train/margins_1": 0.7271707653999329, "rewards_train/margins_2": 0.6454005837440491, "step": 121 }, { "epoch": 0.36, "logps_train/policy_1_2": -127.79105377197266, "logps_train/policy_1_l": -168.8995819091797, "logps_train/policy_1_w": -149.92352294921875, "logps_train/policy_2_2": -113.97521209716797, "logps_train/policy_2_w": -173.7205810546875, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.732222318649292, "rewards_train/1-l": -1.5247235298156738, "rewards_train/1-w": 1.6814765930175781, "rewards_train/2-2": 1.87982177734375, "rewards_train/2-w": 0.9412226676940918, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.206200122833252, "rewards_train/margins_1": -0.05074572563171387, "rewards_train/margins_2": 0.9385991096496582, "step": 121 }, { "epoch": 0.36, "logps_train/policy_1_2": -100.1190185546875, "logps_train/policy_1_l": -76.73616027832031, "logps_train/policy_1_w": -101.78762817382812, "logps_train/policy_2_2": -80.32957458496094, "logps_train/policy_2_w": -125.733642578125, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -94.5, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 1.0834113359451294, "rewards_train/1-l": -0.692365288734436, "rewards_train/1-w": 1.6399868726730347, "rewards_train/2-2": 1.411183476448059, "rewards_train/2-w": 0.7110103964805603, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.3323521614074707, "rewards_train/margins_1": 0.5565755367279053, "rewards_train/margins_2": 0.7001730799674988, "step": 121 }, { "epoch": 0.37, "learning_rate": 4.767506037004344e-06, "loss": 0.8856, "step": 122 }, { "epoch": 0.37, "logps_train/policy_1_2": -236.47201538085938, "logps_train/policy_1_l": -139.24575805664062, "logps_train/policy_1_w": -108.72935485839844, "logps_train/policy_2_2": -189.36705017089844, "logps_train/policy_2_w": -136.51742553710938, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 0.6879563331604004, "rewards_train/1-l": -1.3099279403686523, "rewards_train/1-w": 1.3005027770996094, "rewards_train/2-2": 2.317396640777588, "rewards_train/2-w": 0.6076332330703735, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.6104307174682617, "rewards_train/margins_1": 0.612546443939209, "rewards_train/margins_2": 1.7097634077072144, "step": 122 }, { "epoch": 0.37, "logps_train/policy_1_2": -94.01371002197266, "logps_train/policy_1_l": -110.97979736328125, "logps_train/policy_1_w": -67.08879852294922, "logps_train/policy_2_2": -76.78719329833984, "logps_train/policy_2_w": -81.91852569580078, "logps_train/ref_1_2": -105.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -77.5, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -90.0, "rewards_train/1-2": 1.0626921653747559, "rewards_train/1-l": -1.2966618537902832, "rewards_train/1-w": 1.041510820388794, "rewards_train/2-2": 1.395499348640442, "rewards_train/2-w": 0.8114678859710693, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.338172674179077, "rewards_train/margins_1": -0.021181344985961914, "rewards_train/margins_2": 0.5840314626693726, "step": 122 }, { "epoch": 0.37, "logps_train/policy_1_2": -165.0946044921875, "logps_train/policy_1_l": -223.73165893554688, "logps_train/policy_1_w": -220.15957641601562, "logps_train/policy_2_2": -141.30027770996094, "logps_train/policy_2_w": -269.6629638671875, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -256.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -288.0, "rewards_train/1-2": 0.6881962418556213, "rewards_train/1-l": -2.212033271789551, "rewards_train/1-w": 3.5324788093566895, "rewards_train/2-2": 1.3104015588760376, "rewards_train/2-w": 1.9508905410766602, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.74451208114624, "rewards_train/margins_1": 2.844282567501068, "rewards_train/margins_2": -0.6404889822006226, "step": 122 }, { "epoch": 0.37, "logps_train/policy_1_2": -145.0775604248047, "logps_train/policy_1_l": -155.42572021484375, "logps_train/policy_1_w": -94.62744140625, "logps_train/policy_2_2": -116.56800842285156, "logps_train/policy_2_w": -119.90069580078125, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 0.3961499333381653, "rewards_train/1-l": -1.2402293682098389, "rewards_train/1-w": 1.419286847114563, "rewards_train/2-2": 1.4041366577148438, "rewards_train/2-w": 0.9138362407684326, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.659516215324402, "rewards_train/margins_1": 1.0231369137763977, "rewards_train/margins_2": 0.49030041694641113, "step": 122 }, { "epoch": 0.37, "logps_train/policy_1_2": -157.05380249023438, "logps_train/policy_1_l": -117.57044219970703, "logps_train/policy_1_w": -158.76605224609375, "logps_train/policy_2_2": -138.79202270507812, "logps_train/policy_2_w": -185.84962463378906, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.5727450847625732, "rewards_train/1-l": -0.4302861988544464, "rewards_train/1-w": 2.1460516452789307, "rewards_train/2-2": 2.078610897064209, "rewards_train/2-w": 1.5892558097839355, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.576337844133377, "rewards_train/margins_1": 0.5733065605163574, "rewards_train/margins_2": 0.48935508728027344, "step": 122 }, { "epoch": 0.37, "logps_train/policy_1_2": -133.8123779296875, "logps_train/policy_1_l": -150.244140625, "logps_train/policy_1_w": -117.43818664550781, "logps_train/policy_2_2": -112.79190826416016, "logps_train/policy_2_w": -139.52720642089844, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 0.4828239381313324, "rewards_train/1-l": -0.9580075144767761, "rewards_train/1-w": 2.392118453979492, "rewards_train/2-2": 0.9286214709281921, "rewards_train/2-w": 2.012904405593872, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.0, "rewards_train/margins": 3.3501259684562683, "rewards_train/margins_1": 1.9092945158481598, "rewards_train/margins_2": -1.08428293466568, "step": 122 }, { "epoch": 0.37, "logps_train/policy_1_2": -135.95172119140625, "logps_train/policy_1_l": -118.09231567382812, "logps_train/policy_1_w": -99.20369720458984, "logps_train/policy_2_2": -106.07819366455078, "logps_train/policy_2_w": -118.74095916748047, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 0.879827618598938, "rewards_train/1-l": -0.7919957041740417, "rewards_train/1-w": 1.690567135810852, "rewards_train/2-2": 1.508587121963501, "rewards_train/2-w": 1.0259034633636475, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.482562839984894, "rewards_train/margins_1": 0.8107395172119141, "rewards_train/margins_2": 0.4826836585998535, "step": 122 }, { "epoch": 0.37, "logps_train/policy_1_2": -132.20413208007812, "logps_train/policy_1_l": -113.35623931884766, "logps_train/policy_1_w": -163.88999938964844, "logps_train/policy_2_2": -109.59062957763672, "logps_train/policy_2_w": -191.6317138671875, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.5670874118804932, "rewards_train/1-l": -0.7295688390731812, "rewards_train/1-w": 1.756312370300293, "rewards_train/2-2": 2.319843292236328, "rewards_train/2-w": 0.6493278741836548, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.485881209373474, "rewards_train/margins_1": 0.1892249584197998, "rewards_train/margins_2": 1.6705154180526733, "step": 122 }, { "epoch": 0.37, "logps_train/policy_1_2": -171.81625366210938, "logps_train/policy_1_l": -193.64205932617188, "logps_train/policy_1_w": -154.13796997070312, "logps_train/policy_2_2": -132.99012756347656, "logps_train/policy_2_w": -205.44711303710938, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.4351708889007568, "rewards_train/1-l": -1.2503387928009033, "rewards_train/1-w": 1.8447974920272827, "rewards_train/2-2": 2.401963949203491, "rewards_train/2-w": 1.1767723560333252, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.095136284828186, "rewards_train/margins_1": 0.4096266031265259, "rewards_train/margins_2": 1.225191593170166, "step": 123 }, { "epoch": 0.37, "logps_train/policy_1_2": -149.510009765625, "logps_train/policy_1_l": -180.8721923828125, "logps_train/policy_1_w": -175.93870544433594, "logps_train/policy_2_2": -118.44100952148438, "logps_train/policy_2_w": -206.39862060546875, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.0614984035491943, "rewards_train/1-l": -1.4706177711486816, "rewards_train/1-w": 1.4490985870361328, "rewards_train/2-2": 2.051992893218994, "rewards_train/2-w": 0.517950177192688, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.9197163581848145, "rewards_train/margins_1": 0.3876001834869385, "rewards_train/margins_2": 1.5340427160263062, "step": 123 }, { "epoch": 0.37, "logps_train/policy_1_2": -138.37765502929688, "logps_train/policy_1_l": -179.6969757080078, "logps_train/policy_1_w": -97.0415267944336, "logps_train/policy_2_2": -104.40571594238281, "logps_train/policy_2_w": -130.61903381347656, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 0.7809847593307495, "rewards_train/1-l": -1.977705717086792, "rewards_train/1-w": 2.322800636291504, "rewards_train/2-2": 1.7313041687011719, "rewards_train/2-w": 1.6932728290557861, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.300506353378296, "rewards_train/margins_1": 1.5418158769607544, "rewards_train/margins_2": 0.03803133964538574, "step": 123 }, { "epoch": 0.37, "logps_train/policy_1_2": -151.95162963867188, "logps_train/policy_1_l": -81.6478271484375, "logps_train/policy_1_w": -104.93354797363281, "logps_train/policy_2_2": -130.5538330078125, "logps_train/policy_2_w": -145.43014526367188, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -80.5, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 0.6948754787445068, "rewards_train/1-l": -0.1202516183257103, "rewards_train/1-w": 2.107426881790161, "rewards_train/2-2": 1.2621948719024658, "rewards_train/2-w": 0.7749552130699158, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.2276785001158714, "rewards_train/margins_1": 1.4125514030456543, "rewards_train/margins_2": 0.48723965883255005, "step": 123 }, { "epoch": 0.37, "logps_train/policy_1_2": -129.10354614257812, "logps_train/policy_1_l": -172.37704467773438, "logps_train/policy_1_w": -94.19944763183594, "logps_train/policy_2_2": -99.50186920166016, "logps_train/policy_2_w": -122.64042663574219, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.3009734153747559, "rewards_train/1-l": -2.2431740760803223, "rewards_train/1-w": 1.4284923076629639, "rewards_train/2-2": 2.0712976455688477, "rewards_train/2-w": 0.6773631572723389, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.671666383743286, "rewards_train/margins_1": 0.127518892288208, "rewards_train/margins_2": 1.3939344882965088, "step": 123 }, { "epoch": 0.37, "logps_train/policy_1_2": -92.57295227050781, "logps_train/policy_1_l": -87.41251373291016, "logps_train/policy_1_w": -152.5692138671875, "logps_train/policy_2_2": -69.3159408569336, "logps_train/policy_2_w": -188.89097595214844, "logps_train/ref_1_2": -105.0, "logps_train/ref_1_l": -79.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -85.5, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.2427046298980713, "rewards_train/1-l": -0.8139078617095947, "rewards_train/1-w": 2.471203565597534, "rewards_train/2-2": 1.6370580196380615, "rewards_train/2-w": 0.9015279412269592, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.285111427307129, "rewards_train/margins_1": 1.228498935699463, "rewards_train/margins_2": 0.7355300784111023, "step": 123 }, { "epoch": 0.37, "logps_train/policy_1_2": -180.08187866210938, "logps_train/policy_1_l": -199.70721435546875, "logps_train/policy_1_w": -145.62640380859375, "logps_train/policy_2_2": -159.52651977539062, "logps_train/policy_2_w": -179.74533081054688, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.0433743000030518, "rewards_train/1-l": -1.1527520418167114, "rewards_train/1-w": 1.989703893661499, "rewards_train/2-2": 1.4489113092422485, "rewards_train/2-w": 0.6473419666290283, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1424559354782104, "rewards_train/margins_1": 0.9463295936584473, "rewards_train/margins_2": 0.8015693426132202, "step": 123 }, { "epoch": 0.37, "logps_train/policy_1_2": -194.1271209716797, "logps_train/policy_1_l": -200.9730682373047, "logps_train/policy_1_w": -195.0105743408203, "logps_train/policy_2_2": -168.23536682128906, "logps_train/policy_2_w": -236.51930236816406, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -245.0, "rewards_train/1-2": 0.5775226354598999, "rewards_train/1-l": -1.361857295036316, "rewards_train/1-w": 2.4106616973876953, "rewards_train/2-2": 1.0970690250396729, "rewards_train/2-w": 0.863695502281189, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.7725189924240112, "rewards_train/margins_1": 1.8331390619277954, "rewards_train/margins_2": 0.2333735227584839, "step": 123 }, { "epoch": 0.37, "learning_rate": 4.756994574914359e-06, "loss": 0.9644, "step": 124 }, { "epoch": 0.37, "logps_train/policy_1_2": -168.7908477783203, "logps_train/policy_1_l": -199.5838623046875, "logps_train/policy_1_w": -168.8044891357422, "logps_train/policy_2_2": -124.43595886230469, "logps_train/policy_2_w": -220.15235900878906, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": 0.4599783420562744, "rewards_train/1-l": -2.2731332778930664, "rewards_train/1-w": 2.2798056602478027, "rewards_train/2-2": 1.688435435295105, "rewards_train/2-w": 0.8787092566490173, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.552938938140869, "rewards_train/margins_1": 1.8198273181915283, "rewards_train/margins_2": 0.8097261786460876, "step": 124 }, { "epoch": 0.37, "logps_train/policy_1_2": -124.14291381835938, "logps_train/policy_1_l": -83.96952819824219, "logps_train/policy_1_w": -53.24641799926758, "logps_train/policy_2_2": -103.65493774414062, "logps_train/policy_2_w": -67.36759948730469, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -62.25, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -74.0, "rewards_train/1-2": -0.11702558398246765, "rewards_train/1-l": -0.6310344934463501, "rewards_train/1-w": 0.908561110496521, "rewards_train/2-2": 0.6028658151626587, "rewards_train/2-w": 0.6894123554229736, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.539595603942871, "rewards_train/margins_1": 1.0255866944789886, "rewards_train/margins_2": -0.08654654026031494, "step": 124 }, { "epoch": 0.37, "logps_train/policy_1_2": -76.8313217163086, "logps_train/policy_1_l": -100.2210464477539, "logps_train/policy_1_w": -84.52247619628906, "logps_train/policy_2_2": -60.201995849609375, "logps_train/policy_2_w": -133.12625122070312, "logps_train/ref_1_2": -89.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -73.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 1.2309300899505615, "rewards_train/1-l": -0.7506206631660461, "rewards_train/1-w": 1.2196269035339355, "rewards_train/2-2": 1.2923003435134888, "rewards_train/2-w": 0.4240930676460266, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.9702475666999817, "rewards_train/margins_1": -0.011303186416625977, "rewards_train/margins_2": 0.8682072758674622, "step": 124 }, { "epoch": 0.37, "logps_train/policy_1_2": -136.75985717773438, "logps_train/policy_1_l": -185.63394165039062, "logps_train/policy_1_w": -110.42747497558594, "logps_train/policy_2_2": -113.396240234375, "logps_train/policy_2_w": -134.966552734375, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.2287013530731201, "rewards_train/1-l": -1.4813642501831055, "rewards_train/1-w": 1.9205341339111328, "rewards_train/2-2": 2.034595012664795, "rewards_train/2-w": 1.3088133335113525, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.4018983840942383, "rewards_train/margins_1": 0.6918327808380127, "rewards_train/margins_2": 0.7257816791534424, "step": 124 }, { "epoch": 0.37, "logps_train/policy_1_2": -165.8154754638672, "logps_train/policy_1_l": -210.18280029296875, "logps_train/policy_1_w": -143.78372192382812, "logps_train/policy_2_2": -130.75723266601562, "logps_train/policy_2_w": -178.88021850585938, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -193.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.1450145244598389, "rewards_train/1-l": -1.7198418378829956, "rewards_train/1-w": 1.8638150691986084, "rewards_train/2-2": 2.1848230361938477, "rewards_train/2-w": 1.472916603088379, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.583656907081604, "rewards_train/margins_1": 0.7188005447387695, "rewards_train/margins_2": 0.7119064331054688, "step": 124 }, { "epoch": 0.37, "logps_train/policy_1_2": -211.17599487304688, "logps_train/policy_1_l": -192.95639038085938, "logps_train/policy_1_w": -140.7854766845703, "logps_train/policy_2_2": -180.875, "logps_train/policy_2_w": -174.50291442871094, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.0933386087417603, "rewards_train/1-l": -1.6514980792999268, "rewards_train/1-w": 1.6745768785476685, "rewards_train/2-2": 2.019531488418579, "rewards_train/2-w": 1.005958080291748, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.326074957847595, "rewards_train/margins_1": 0.5812382698059082, "rewards_train/margins_2": 1.013573408126831, "step": 124 }, { "epoch": 0.37, "logps_train/policy_1_2": -167.02073669433594, "logps_train/policy_1_l": -104.77421569824219, "logps_train/policy_1_w": -120.72479248046875, "logps_train/policy_2_2": -137.0094757080078, "logps_train/policy_2_w": -146.11814880371094, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.461988925933838, "rewards_train/1-l": -0.5100393295288086, "rewards_train/1-w": 2.39314603805542, "rewards_train/2-2": 1.8123332262039185, "rewards_train/2-w": 1.6581071615219116, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.9031853675842285, "rewards_train/margins_1": 0.931157112121582, "rewards_train/margins_2": 0.15422606468200684, "step": 124 }, { "epoch": 0.37, "logps_train/policy_1_2": -236.97303771972656, "logps_train/policy_1_l": -232.6812744140625, "logps_train/policy_1_w": -163.74774169921875, "logps_train/policy_2_2": -184.0948486328125, "logps_train/policy_2_w": -213.56878662109375, "logps_train/ref_1_2": -251.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 1.371446132659912, "rewards_train/1-l": -2.0618767738342285, "rewards_train/1-w": 2.8752262592315674, "rewards_train/2-2": 2.7092654705047607, "rewards_train/2-w": 1.186870813369751, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.937103033065796, "rewards_train/margins_1": 1.5037801265716553, "rewards_train/margins_2": 1.5223946571350098, "step": 124 }, { "epoch": 0.37, "logps_train/policy_1_2": -154.71922302246094, "logps_train/policy_1_l": -158.09075927734375, "logps_train/policy_1_w": -86.25804138183594, "logps_train/policy_2_2": -124.90525817871094, "logps_train/policy_2_w": -113.71780395507812, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 1.0421403646469116, "rewards_train/1-l": -1.3012624979019165, "rewards_train/1-w": 1.5460706949234009, "rewards_train/2-2": 1.598536491394043, "rewards_train/2-w": 1.0641570091247559, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8473331928253174, "rewards_train/margins_1": 0.5039303302764893, "rewards_train/margins_2": 0.5343794822692871, "step": 125 }, { "epoch": 0.37, "logps_train/policy_1_2": -162.30960083007812, "logps_train/policy_1_l": -153.03927612304688, "logps_train/policy_1_w": -118.91841125488281, "logps_train/policy_2_2": -137.88558959960938, "logps_train/policy_2_w": -145.41644287109375, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 0.7721652984619141, "rewards_train/1-l": -1.0206749439239502, "rewards_train/1-w": 2.436283588409424, "rewards_train/2-2": 1.2676910161972046, "rewards_train/2-w": 1.5224186182022095, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.456958532333374, "rewards_train/margins_1": 1.6641182899475098, "rewards_train/margins_2": -0.2547276020050049, "step": 125 }, { "epoch": 0.37, "logps_train/policy_1_2": -162.8497314453125, "logps_train/policy_1_l": -174.01051330566406, "logps_train/policy_1_w": -111.36402893066406, "logps_train/policy_2_2": -124.0785140991211, "logps_train/policy_2_w": -148.82931518554688, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 0.3513544201850891, "rewards_train/1-l": -1.7619893550872803, "rewards_train/1-w": 2.515550136566162, "rewards_train/2-2": 1.5827734470367432, "rewards_train/2-w": 1.3846467733383179, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.277539491653442, "rewards_train/margins_1": 2.164195716381073, "rewards_train/margins_2": 0.1981266736984253, "step": 125 }, { "epoch": 0.37, "logps_train/policy_1_2": -151.30908203125, "logps_train/policy_1_l": -234.3453826904297, "logps_train/policy_1_w": -146.40573120117188, "logps_train/policy_2_2": -125.55374908447266, "logps_train/policy_2_w": -169.8291015625, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -214.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 0.8019033670425415, "rewards_train/1-l": -2.067742109298706, "rewards_train/1-w": 2.3352088928222656, "rewards_train/2-2": 1.4625941514968872, "rewards_train/2-w": 1.904590368270874, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.402951002120972, "rewards_train/margins_1": 1.5333055257797241, "rewards_train/margins_2": -0.4419962167739868, "step": 125 }, { "epoch": 0.37, "logps_train/policy_1_2": -190.87841796875, "logps_train/policy_1_l": -187.2183837890625, "logps_train/policy_1_w": -119.01725769042969, "logps_train/policy_2_2": -159.47927856445312, "logps_train/policy_2_w": -162.74252319335938, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 1.584031581878662, "rewards_train/1-l": -2.062657356262207, "rewards_train/1-w": 2.4468090534210205, "rewards_train/2-2": 2.3911352157592773, "rewards_train/2-w": 1.2476234436035156, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.5094664096832275, "rewards_train/margins_1": 0.8627774715423584, "rewards_train/margins_2": 1.1435117721557617, "step": 125 }, { "epoch": 0.37, "logps_train/policy_1_2": -204.35411071777344, "logps_train/policy_1_l": -212.12875366210938, "logps_train/policy_1_w": -131.62774658203125, "logps_train/policy_2_2": -162.9615478515625, "logps_train/policy_2_w": -175.12794494628906, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 0.7833391427993774, "rewards_train/1-l": -2.2024271488189697, "rewards_train/1-w": 2.0745301246643066, "rewards_train/2-2": 2.0241587162017822, "rewards_train/2-w": 0.9372056722640991, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.276957273483276, "rewards_train/margins_1": 1.2911909818649292, "rewards_train/margins_2": 1.086953043937683, "step": 125 }, { "epoch": 0.37, "logps_train/policy_1_2": -97.10398864746094, "logps_train/policy_1_l": -101.41610717773438, "logps_train/policy_1_w": -92.9533920288086, "logps_train/policy_2_2": -77.44717407226562, "logps_train/policy_2_w": -116.10832977294922, "logps_train/ref_1_2": -103.0, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -89.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 0.5724135637283325, "rewards_train/1-l": -0.8677829504013062, "rewards_train/1-w": 0.9950902462005615, "rewards_train/2-2": 1.1244233846664429, "rewards_train/2-w": 0.40010422468185425, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.8628731966018677, "rewards_train/margins_1": 0.422676682472229, "rewards_train/margins_2": 0.7243191599845886, "step": 125 }, { "epoch": 0.37, "logps_train/policy_1_2": -136.97677612304688, "logps_train/policy_1_l": -89.6026611328125, "logps_train/policy_1_w": -77.79156494140625, "logps_train/policy_2_2": -103.95726013183594, "logps_train/policy_2_w": -108.13543701171875, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 0.7179479002952576, "rewards_train/1-l": -0.7430787086486816, "rewards_train/1-w": 1.552875280380249, "rewards_train/2-2": 1.5339608192443848, "rewards_train/2-w": 0.8208311796188354, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.2959539890289307, "rewards_train/margins_1": 0.8349273800849915, "rewards_train/margins_2": 0.7131296396255493, "step": 125 }, { "epoch": 0.38, "learning_rate": 4.7462628341327e-06, "loss": 0.8974, "step": 126 }, { "epoch": 0.38, "logps_train/policy_1_2": -223.91014099121094, "logps_train/policy_1_l": -270.9549560546875, "logps_train/policy_1_w": -178.78843688964844, "logps_train/policy_2_2": -189.91806030273438, "logps_train/policy_2_w": -213.92697143554688, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -240.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.6550793647766113, "rewards_train/1-l": -3.0454986095428467, "rewards_train/1-w": 3.164907455444336, "rewards_train/2-2": 2.3097572326660156, "rewards_train/2-w": 1.9760531187057495, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.210406064987183, "rewards_train/margins_1": 1.5098280906677246, "rewards_train/margins_2": 0.3337041139602661, "step": 126 }, { "epoch": 0.38, "logps_train/policy_1_2": -190.21487426757812, "logps_train/policy_1_l": -196.93719482421875, "logps_train/policy_1_w": -115.5405044555664, "logps_train/policy_2_2": -163.40016174316406, "logps_train/policy_2_w": -138.72459411621094, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.4660124778747559, "rewards_train/1-l": -1.7083690166473389, "rewards_train/1-w": 1.4225125312805176, "rewards_train/2-2": 2.166233539581299, "rewards_train/2-w": 0.8802742958068848, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.1308815479278564, "rewards_train/margins_1": -0.04349994659423828, "rewards_train/margins_2": 1.285959243774414, "step": 126 }, { "epoch": 0.38, "logps_train/policy_1_2": -145.78004455566406, "logps_train/policy_1_l": -147.85626220703125, "logps_train/policy_1_w": -132.15740966796875, "logps_train/policy_2_2": -118.32463073730469, "logps_train/policy_2_w": -172.5028839111328, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.7352761030197144, "rewards_train/1-l": -1.3610179424285889, "rewards_train/1-w": 3.7701969146728516, "rewards_train/2-2": 2.2370681762695312, "rewards_train/2-w": 1.988774061203003, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.13121485710144, "rewards_train/margins_1": 2.034920811653137, "rewards_train/margins_2": 0.24829411506652832, "step": 126 }, { "epoch": 0.38, "logps_train/policy_1_2": -124.22955322265625, "logps_train/policy_1_l": -106.5700454711914, "logps_train/policy_1_w": -111.79851531982422, "logps_train/policy_2_2": -100.11044311523438, "logps_train/policy_2_w": -145.48260498046875, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 0.6348570585250854, "rewards_train/1-l": -1.500657081604004, "rewards_train/1-w": 1.5046217441558838, "rewards_train/2-2": 1.5799709558486938, "rewards_train/2-w": 0.38806837797164917, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0052788257598877, "rewards_train/margins_1": 0.8697646856307983, "rewards_train/margins_2": 1.1919025778770447, "step": 126 }, { "epoch": 0.38, "logps_train/policy_1_2": -153.11965942382812, "logps_train/policy_1_l": -139.72068786621094, "logps_train/policy_1_w": -123.14167785644531, "logps_train/policy_2_2": -124.13220977783203, "logps_train/policy_2_w": -149.77622985839844, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.0224096775054932, "rewards_train/1-l": -1.555662989616394, "rewards_train/1-w": 1.8866132497787476, "rewards_train/2-2": 1.9211537837982178, "rewards_train/2-w": 0.8661275506019592, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4422762393951416, "rewards_train/margins_1": 0.8642035722732544, "rewards_train/margins_2": 1.0550262331962585, "step": 126 }, { "epoch": 0.38, "logps_train/policy_1_2": -189.86183166503906, "logps_train/policy_1_l": -181.63839721679688, "logps_train/policy_1_w": -162.52670288085938, "logps_train/policy_2_2": -165.24618530273438, "logps_train/policy_2_w": -190.44717407226562, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.7399893999099731, "rewards_train/1-l": -1.732589840888977, "rewards_train/1-w": 2.769205093383789, "rewards_train/2-2": 2.46756911277771, "rewards_train/2-w": 1.8849701881408691, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.501794934272766, "rewards_train/margins_1": 1.029215693473816, "rewards_train/margins_2": 0.5825989246368408, "step": 126 }, { "epoch": 0.38, "logps_train/policy_1_2": -104.952880859375, "logps_train/policy_1_l": -112.54638671875, "logps_train/policy_1_w": -105.42974090576172, "logps_train/policy_2_2": -84.8615951538086, "logps_train/policy_2_w": -123.45892333984375, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.7937740683555603, "rewards_train/1-l": -0.7819831371307373, "rewards_train/1-w": 1.664838433265686, "rewards_train/2-2": 1.0810281038284302, "rewards_train/2-w": 1.26816987991333, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.4468215703964233, "rewards_train/margins_1": 0.8710643649101257, "rewards_train/margins_2": -0.1871417760848999, "step": 126 }, { "epoch": 0.38, "logps_train/policy_1_2": -130.2427215576172, "logps_train/policy_1_l": -132.0812530517578, "logps_train/policy_1_w": -131.24911499023438, "logps_train/policy_2_2": -114.97465515136719, "logps_train/policy_2_w": -151.73374938964844, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.0311965942382812, "rewards_train/1-l": -0.9415227770805359, "rewards_train/1-w": 1.7049716711044312, "rewards_train/2-2": 1.3548777103424072, "rewards_train/2-w": 0.9844374060630798, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.646494448184967, "rewards_train/margins_1": 0.6737750768661499, "rewards_train/margins_2": 0.3704403042793274, "step": 126 }, { "epoch": 0.38, "logps_train/policy_1_2": -90.51875305175781, "logps_train/policy_1_l": -73.19668579101562, "logps_train/policy_1_w": -66.7522964477539, "logps_train/policy_2_2": -78.02229309082031, "logps_train/policy_2_w": -79.087158203125, "logps_train/ref_1_2": -98.5, "logps_train/ref_1_l": -66.5, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -88.0, "logps_train/ref_2_w": -87.0, "rewards_train/1-2": 0.8153117895126343, "rewards_train/1-l": -0.6682040095329285, "rewards_train/1-w": 1.1629542112350464, "rewards_train/2-2": 0.966715931892395, "rewards_train/2-w": 0.7629640102386475, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.8311582207679749, "rewards_train/margins_1": 0.3476424217224121, "rewards_train/margins_2": 0.20375192165374756, "step": 127 }, { "epoch": 0.38, "logps_train/policy_1_2": -140.43836975097656, "logps_train/policy_1_l": -139.29335021972656, "logps_train/policy_1_w": -97.79541015625, "logps_train/policy_2_2": -117.24529266357422, "logps_train/policy_2_w": -118.47234344482422, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 0.4239364564418793, "rewards_train/1-l": -1.499647855758667, "rewards_train/1-w": 1.7997560501098633, "rewards_train/2-2": 1.2891422510147095, "rewards_train/2-w": 1.2486642599105835, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.2994039058685303, "rewards_train/margins_1": 1.375819593667984, "rewards_train/margins_2": 0.04047799110412598, "step": 127 }, { "epoch": 0.38, "logps_train/policy_1_2": -114.38483428955078, "logps_train/policy_1_l": -114.60773468017578, "logps_train/policy_1_w": -89.51622772216797, "logps_train/policy_2_2": -93.18814849853516, "logps_train/policy_2_w": -118.51432800292969, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 1.42557954788208, "rewards_train/1-l": -1.2712228298187256, "rewards_train/1-w": 1.6015022993087769, "rewards_train/2-2": 1.9968100786209106, "rewards_train/2-w": 0.8087241053581238, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.8727251291275024, "rewards_train/margins_1": 0.17592275142669678, "rewards_train/margins_2": 1.1880859732627869, "step": 127 }, { "epoch": 0.38, "logps_train/policy_1_2": -197.38253784179688, "logps_train/policy_1_l": -166.37310791015625, "logps_train/policy_1_w": -135.75634765625, "logps_train/policy_2_2": -169.28909301757812, "logps_train/policy_2_w": -167.00320434570312, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.6273709535598755, "rewards_train/1-l": -1.487311601638794, "rewards_train/1-w": 2.97280216217041, "rewards_train/2-2": 2.212496280670166, "rewards_train/2-w": 2.06335186958313, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.460113763809204, "rewards_train/margins_1": 1.3454312086105347, "rewards_train/margins_2": 0.14914441108703613, "step": 127 }, { "epoch": 0.38, "logps_train/policy_1_2": -122.17143249511719, "logps_train/policy_1_l": -107.57502746582031, "logps_train/policy_1_w": -61.67924499511719, "logps_train/policy_2_2": -103.95082092285156, "logps_train/policy_2_w": -80.65281677246094, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -73.5, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -89.0, "rewards_train/1-2": 1.02816903591156, "rewards_train/1-l": -1.0247879028320312, "rewards_train/1-w": 1.1742627620697021, "rewards_train/2-2": 1.5010110139846802, "rewards_train/2-w": 0.8550306558609009, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.1990506649017334, "rewards_train/margins_1": 0.1460937261581421, "rewards_train/margins_2": 0.6459803581237793, "step": 127 }, { "epoch": 0.38, "logps_train/policy_1_2": -104.89212799072266, "logps_train/policy_1_l": -95.19415283203125, "logps_train/policy_1_w": -81.19440460205078, "logps_train/policy_2_2": -84.23143005371094, "logps_train/policy_2_w": -106.43061828613281, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -85.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 1.104537010192871, "rewards_train/1-l": -0.9432915449142456, "rewards_train/1-w": 1.9692314863204956, "rewards_train/2-2": 1.5393571853637695, "rewards_train/2-w": 0.9647506475448608, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.912523031234741, "rewards_train/margins_1": 0.8646944761276245, "rewards_train/margins_2": 0.5746065378189087, "step": 127 }, { "epoch": 0.38, "logps_train/policy_1_2": -146.32791137695312, "logps_train/policy_1_l": -164.253173828125, "logps_train/policy_1_w": -142.0102996826172, "logps_train/policy_2_2": -119.22054290771484, "logps_train/policy_2_w": -183.36398315429688, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 0.5656469464302063, "rewards_train/1-l": -2.261840343475342, "rewards_train/1-w": 2.9942831993103027, "rewards_train/2-2": 1.0853679180145264, "rewards_train/2-w": 1.823758840560913, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.2561235427856445, "rewards_train/margins_1": 2.4286362528800964, "rewards_train/margins_2": -0.7383909225463867, "step": 127 }, { "epoch": 0.38, "logps_train/policy_1_2": -91.6732406616211, "logps_train/policy_1_l": -93.0150146484375, "logps_train/policy_1_w": -91.50157928466797, "logps_train/policy_2_2": -77.52378845214844, "logps_train/policy_2_w": -108.67587280273438, "logps_train/ref_1_2": -97.0, "logps_train/ref_1_l": -86.5, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -87.5, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 0.5584573745727539, "rewards_train/1-l": -0.6587282419204712, "rewards_train/1-w": 1.556092619895935, "rewards_train/2-2": 1.0011372566223145, "rewards_train/2-w": 1.1714750528335571, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.2148208618164062, "rewards_train/margins_1": 0.9976352453231812, "rewards_train/margins_2": -0.17033779621124268, "step": 127 }, { "epoch": 0.38, "learning_rate": 4.7353118620583464e-06, "loss": 0.8713, "step": 128 }, { "epoch": 0.38, "logps_train/policy_1_2": -151.92013549804688, "logps_train/policy_1_l": -115.43016052246094, "logps_train/policy_1_w": -96.45782470703125, "logps_train/policy_2_2": -117.88304138183594, "logps_train/policy_2_w": -124.66624450683594, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.0314244031906128, "rewards_train/1-l": -1.1729469299316406, "rewards_train/1-w": 1.2141780853271484, "rewards_train/2-2": 1.8304468393325806, "rewards_train/2-w": 0.6972430944442749, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.387125015258789, "rewards_train/margins_1": 0.18275368213653564, "rewards_train/margins_2": 1.1332037448883057, "step": 128 }, { "epoch": 0.38, "logps_train/policy_1_2": -129.93789672851562, "logps_train/policy_1_l": -144.34255981445312, "logps_train/policy_1_w": -103.43258666992188, "logps_train/policy_2_2": -111.0091552734375, "logps_train/policy_2_w": -123.41417694091797, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.5015215873718262, "rewards_train/1-l": -1.8580844402313232, "rewards_train/1-w": 2.4598660469055176, "rewards_train/2-2": 2.0139286518096924, "rewards_train/2-w": 1.6351451873779297, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.317950487136841, "rewards_train/margins_1": 0.9583444595336914, "rewards_train/margins_2": 0.3787834644317627, "step": 128 }, { "epoch": 0.38, "logps_train/policy_1_2": -146.3978271484375, "logps_train/policy_1_l": -110.81336212158203, "logps_train/policy_1_w": -150.12921142578125, "logps_train/policy_2_2": -120.05439758300781, "logps_train/policy_2_w": -182.9167938232422, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 2.2133424282073975, "rewards_train/1-l": -1.0537058115005493, "rewards_train/1-w": 1.966766119003296, "rewards_train/2-2": 3.137528419494629, "rewards_train/2-w": 0.7919154167175293, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.020471930503845, "rewards_train/margins_1": -0.24657630920410156, "rewards_train/margins_2": 2.3456130027770996, "step": 128 }, { "epoch": 0.38, "logps_train/policy_1_2": -70.13636016845703, "logps_train/policy_1_l": -75.05500030517578, "logps_train/policy_1_w": -62.26091003417969, "logps_train/policy_2_2": -64.5663070678711, "logps_train/policy_2_w": -74.65494537353516, "logps_train/ref_1_2": -78.0, "logps_train/ref_1_l": -67.5, "logps_train/ref_1_w": -75.0, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -86.0, "rewards_train/1-2": 0.7590203881263733, "rewards_train/1-l": -0.7711248397827148, "rewards_train/1-w": 1.245784044265747, "rewards_train/2-2": 0.9539161920547485, "rewards_train/2-w": 1.1059894561767578, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.016908884048462, "rewards_train/margins_1": 0.4867636561393738, "rewards_train/margins_2": -0.15207326412200928, "step": 128 }, { "epoch": 0.38, "logps_train/policy_1_2": -214.2071075439453, "logps_train/policy_1_l": -177.05908203125, "logps_train/policy_1_w": -138.8864288330078, "logps_train/policy_2_2": -181.33950805664062, "logps_train/policy_2_w": -185.41085815429688, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -215.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 2.2050695419311523, "rewards_train/1-l": -1.1240731477737427, "rewards_train/1-w": 2.471367120742798, "rewards_train/2-2": 3.31487774848938, "rewards_train/2-w": 1.3011008501052856, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5954402685165405, "rewards_train/margins_1": 0.2662975788116455, "rewards_train/margins_2": 2.0137768983840942, "step": 128 }, { "epoch": 0.38, "logps_train/policy_1_2": -125.16644287109375, "logps_train/policy_1_l": -154.14964294433594, "logps_train/policy_1_w": -82.87185668945312, "logps_train/policy_2_2": -99.62185668945312, "logps_train/policy_2_w": -103.73259735107422, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -111.5, "rewards_train/1-2": 1.7739806175231934, "rewards_train/1-l": -0.8245341181755066, "rewards_train/1-w": 1.3272669315338135, "rewards_train/2-2": 2.3878140449523926, "rewards_train/2-w": 0.7911927700042725, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.15180104970932, "rewards_train/margins_1": -0.4467136859893799, "rewards_train/margins_2": 1.5966212749481201, "step": 128 }, { "epoch": 0.38, "logps_train/policy_1_2": -87.14002990722656, "logps_train/policy_1_l": -84.62608337402344, "logps_train/policy_1_w": -94.41860961914062, "logps_train/policy_2_2": -67.1398696899414, "logps_train/policy_2_w": -115.65496826171875, "logps_train/ref_1_2": -99.5, "logps_train/ref_1_l": -79.0, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -84.5, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 1.2213491201400757, "rewards_train/1-l": -0.5585065484046936, "rewards_train/1-w": 1.1466152667999268, "rewards_train/2-2": 1.7252707481384277, "rewards_train/2-w": 0.661261260509491, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.7051218152046204, "rewards_train/margins_1": -0.07473385334014893, "rewards_train/margins_2": 1.0640094876289368, "step": 128 }, { "epoch": 0.38, "logps_train/policy_1_2": -77.54144287109375, "logps_train/policy_1_l": -53.1983642578125, "logps_train/policy_1_w": -102.01690673828125, "logps_train/policy_2_2": -60.257720947265625, "logps_train/policy_2_w": -126.29974365234375, "logps_train/ref_1_2": -90.0, "logps_train/ref_1_l": -50.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -75.5, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 1.2036678791046143, "rewards_train/1-l": -0.34071019291877747, "rewards_train/1-w": 1.7295588254928589, "rewards_train/2-2": 1.5429779291152954, "rewards_train/2-w": 0.8481506705284119, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.0702690184116364, "rewards_train/margins_1": 0.5258909463882446, "rewards_train/margins_2": 0.6948272585868835, "step": 128 }, { "epoch": 0.39, "logps_train/policy_1_2": -174.9229736328125, "logps_train/policy_1_l": -228.53256225585938, "logps_train/policy_1_w": -113.33661651611328, "logps_train/policy_2_2": -133.27699279785156, "logps_train/policy_2_w": -148.6934051513672, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.632702350616455, "rewards_train/1-l": -1.7643890380859375, "rewards_train/1-w": 1.7210254669189453, "rewards_train/2-2": 2.708238124847412, "rewards_train/2-w": 1.2705031633377075, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.485414505004883, "rewards_train/margins_1": 0.08832311630249023, "rewards_train/margins_2": 1.4377349615097046, "step": 129 }, { "epoch": 0.39, "logps_train/policy_1_2": -192.3895263671875, "logps_train/policy_1_l": -233.53378295898438, "logps_train/policy_1_w": -152.732666015625, "logps_train/policy_2_2": -166.75929260253906, "logps_train/policy_2_w": -185.20127868652344, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -219.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.902843952178955, "rewards_train/1-l": -1.461970567703247, "rewards_train/1-w": 1.8751702308654785, "rewards_train/2-2": 2.9221177101135254, "rewards_train/2-w": 1.4454973936080933, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3371407985687256, "rewards_train/margins_1": -0.027673721313476562, "rewards_train/margins_2": 1.4766203165054321, "step": 129 }, { "epoch": 0.39, "logps_train/policy_1_2": -116.7340087890625, "logps_train/policy_1_l": -70.17068481445312, "logps_train/policy_1_w": -105.75277709960938, "logps_train/policy_2_2": -86.99566650390625, "logps_train/policy_2_w": -122.19575500488281, "logps_train/ref_1_2": -120.5, "logps_train/ref_1_l": -69.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -98.5, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 0.36370909214019775, "rewards_train/1-l": -0.15075945854187012, "rewards_train/1-w": 1.3598787784576416, "rewards_train/2-2": 1.1613709926605225, "rewards_train/2-w": 0.9917525053024292, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.5106382369995117, "rewards_train/margins_1": 0.9961696863174438, "rewards_train/margins_2": 0.16961848735809326, "step": 129 }, { "epoch": 0.39, "logps_train/policy_1_2": -95.19418334960938, "logps_train/policy_1_l": -67.6351318359375, "logps_train/policy_1_w": -76.45014190673828, "logps_train/policy_2_2": -85.11581420898438, "logps_train/policy_2_w": -87.9984359741211, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -64.5, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": 1.3415188789367676, "rewards_train/1-l": -0.3383179306983948, "rewards_train/1-w": 1.040922999382019, "rewards_train/2-2": 1.5048247575759888, "rewards_train/2-w": 0.7142190337181091, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.3792409300804138, "rewards_train/margins_1": -0.30059587955474854, "rewards_train/margins_2": 0.7906057238578796, "step": 129 }, { "epoch": 0.39, "logps_train/policy_1_2": -166.76907348632812, "logps_train/policy_1_l": -236.9141387939453, "logps_train/policy_1_w": -133.9386444091797, "logps_train/policy_2_2": -132.04473876953125, "logps_train/policy_2_w": -168.30938720703125, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -225.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.3727022409439087, "rewards_train/1-l": -1.1898521184921265, "rewards_train/1-w": 2.3592610359191895, "rewards_train/2-2": 1.8201361894607544, "rewards_train/2-w": 1.5809755325317383, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.549113154411316, "rewards_train/margins_1": 0.9865587949752808, "rewards_train/margins_2": 0.2391606569290161, "step": 129 }, { "epoch": 0.39, "logps_train/policy_1_2": -121.39588165283203, "logps_train/policy_1_l": -92.60415649414062, "logps_train/policy_1_w": -85.69741821289062, "logps_train/policy_2_2": -100.53006744384766, "logps_train/policy_2_w": -111.27428436279297, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -80.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": 0.31275612115859985, "rewards_train/1-l": -1.2455719709396362, "rewards_train/1-w": 1.1458832025527954, "rewards_train/2-2": 1.0204309225082397, "rewards_train/2-w": 0.5280414819717407, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.3914551734924316, "rewards_train/margins_1": 0.8331270813941956, "rewards_train/margins_2": 0.492389440536499, "step": 129 }, { "epoch": 0.39, "logps_train/policy_1_2": -182.08676147460938, "logps_train/policy_1_l": -147.963134765625, "logps_train/policy_1_w": -148.1053009033203, "logps_train/policy_2_2": -147.15390014648438, "logps_train/policy_2_w": -186.5418243408203, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.2569482326507568, "rewards_train/1-l": -1.275804877281189, "rewards_train/1-w": 2.8285326957702637, "rewards_train/2-2": 1.8314852714538574, "rewards_train/2-w": 2.0598807334899902, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.104337573051453, "rewards_train/margins_1": 1.5715844631195068, "rewards_train/margins_2": -0.2283954620361328, "step": 129 }, { "epoch": 0.39, "logps_train/policy_1_2": -140.0107879638672, "logps_train/policy_1_l": -112.08847045898438, "logps_train/policy_1_w": -169.27088928222656, "logps_train/policy_2_2": -119.86593627929688, "logps_train/policy_2_w": -190.97669982910156, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.4700145721435547, "rewards_train/1-l": -0.9022062420845032, "rewards_train/1-w": 2.6072869300842285, "rewards_train/2-2": 1.8805935382843018, "rewards_train/2-w": 1.9023308753967285, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 3.5094931721687317, "rewards_train/margins_1": 1.1372723579406738, "rewards_train/margins_2": -0.021737337112426758, "step": 129 }, { "epoch": 0.39, "learning_rate": 4.724142727486869e-06, "loss": 0.9021, "step": 130 }, { "epoch": 0.39, "logps_train/policy_1_2": -75.95985412597656, "logps_train/policy_1_l": -59.56221389770508, "logps_train/policy_1_w": -68.99090576171875, "logps_train/policy_2_2": -65.05508422851562, "logps_train/policy_2_w": -76.76321411132812, "logps_train/ref_1_2": -87.5, "logps_train/ref_1_l": -55.5, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -78.0, "logps_train/ref_2_w": -84.0, "rewards_train/1-2": 1.1805777549743652, "rewards_train/1-l": -0.4149126410484314, "rewards_train/1-w": 0.8938784599304199, "rewards_train/2-2": 1.297812581062317, "rewards_train/2-w": 0.7010228037834167, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.3087911009788513, "rewards_train/margins_1": -0.2866992950439453, "rewards_train/margins_2": 0.5967897772789001, "step": 130 }, { "epoch": 0.39, "logps_train/policy_1_2": -161.41734313964844, "logps_train/policy_1_l": -156.76412963867188, "logps_train/policy_1_w": -98.47628784179688, "logps_train/policy_2_2": -136.5315399169922, "logps_train/policy_2_w": -114.80903625488281, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -108.5, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 0.8426403999328613, "rewards_train/1-l": -0.6291959881782532, "rewards_train/1-w": 1.001979947090149, "rewards_train/2-2": 1.456611156463623, "rewards_train/2-w": 0.8919486403465271, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.631175935268402, "rewards_train/margins_1": 0.1593395471572876, "rewards_train/margins_2": 0.564662516117096, "step": 130 }, { "epoch": 0.39, "logps_train/policy_1_2": -149.46371459960938, "logps_train/policy_1_l": -167.8052520751953, "logps_train/policy_1_w": -108.37336730957031, "logps_train/policy_2_2": -124.10047912597656, "logps_train/policy_2_w": -130.81405639648438, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.8630037307739258, "rewards_train/1-l": -1.4102141857147217, "rewards_train/1-w": 1.3478187322616577, "rewards_train/2-2": 2.149326801300049, "rewards_train/2-w": 0.8857818841934204, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.7580329179763794, "rewards_train/margins_1": -0.5151849985122681, "rewards_train/margins_2": 1.2635449171066284, "step": 130 }, { "epoch": 0.39, "logps_train/policy_1_2": -197.53298950195312, "logps_train/policy_1_l": -158.00135803222656, "logps_train/policy_1_w": -136.73956298828125, "logps_train/policy_2_2": -171.27716064453125, "logps_train/policy_2_w": -163.69912719726562, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.4326376914978027, "rewards_train/1-l": -1.1332168579101562, "rewards_train/1-w": 2.7725534439086914, "rewards_train/2-2": 2.4097847938537598, "rewards_train/2-w": 1.8056496381759644, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9057703018188477, "rewards_train/margins_1": 1.3399157524108887, "rewards_train/margins_2": 0.6041351556777954, "step": 130 }, { "epoch": 0.39, "logps_train/policy_1_2": -224.3114776611328, "logps_train/policy_1_l": -305.42791748046875, "logps_train/policy_1_w": -190.2078857421875, "logps_train/policy_2_2": -194.7786407470703, "logps_train/policy_2_w": -232.0077667236328, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -280.0, "logps_train/ref_1_w": -225.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 1.9641636610031128, "rewards_train/1-l": -2.392009973526001, "rewards_train/1-w": 3.460460662841797, "rewards_train/2-2": 2.7346363067626953, "rewards_train/2-w": 2.444535255432129, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.852470636367798, "rewards_train/margins_1": 1.496297001838684, "rewards_train/margins_2": 0.2901010513305664, "step": 130 }, { "epoch": 0.39, "logps_train/policy_1_2": -134.77000427246094, "logps_train/policy_1_l": -187.00308227539062, "logps_train/policy_1_w": -99.84734344482422, "logps_train/policy_2_2": -119.68313598632812, "logps_train/policy_2_w": -115.67666625976562, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 1.3448749780654907, "rewards_train/1-l": -1.5079251527786255, "rewards_train/1-w": 1.6562814712524414, "rewards_train/2-2": 1.6117643117904663, "rewards_train/2-w": 1.2854585647583008, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.164206624031067, "rewards_train/margins_1": 0.3114064931869507, "rewards_train/margins_2": 0.3263057470321655, "step": 130 }, { "epoch": 0.39, "logps_train/policy_1_2": -127.17025756835938, "logps_train/policy_1_l": -125.42133331298828, "logps_train/policy_1_w": -98.00030517578125, "logps_train/policy_2_2": -104.6939926147461, "logps_train/policy_2_w": -111.36482238769531, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -115.5, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": 1.2466461658477783, "rewards_train/1-l": -1.002680778503418, "rewards_train/1-w": 1.832781434059143, "rewards_train/2-2": 1.5590183734893799, "rewards_train/2-w": 1.6041429042816162, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.835462212562561, "rewards_train/margins_1": 0.5861352682113647, "rewards_train/margins_2": -0.04512453079223633, "step": 130 }, { "epoch": 0.39, "logps_train/policy_1_2": -124.46998596191406, "logps_train/policy_1_l": -147.9258270263672, "logps_train/policy_1_w": -110.88365173339844, "logps_train/policy_2_2": -101.43013763427734, "logps_train/policy_2_w": -140.4898681640625, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.5264382362365723, "rewards_train/1-l": -1.7904343605041504, "rewards_train/1-w": 1.9686661958694458, "rewards_train/2-2": 2.1694867610931396, "rewards_train/2-w": 1.1150751113891602, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.759100556373596, "rewards_train/margins_1": 0.44222795963287354, "rewards_train/margins_2": 1.0544116497039795, "step": 130 }, { "epoch": 0.39, "logps_train/policy_1_2": -243.17152404785156, "logps_train/policy_1_l": -181.5786590576172, "logps_train/policy_1_w": -137.39453125, "logps_train/policy_2_2": -205.2943115234375, "logps_train/policy_2_w": -164.68988037109375, "logps_train/ref_1_2": -264.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -237.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 2.02034854888916, "rewards_train/1-l": -1.66118586063385, "rewards_train/1-w": 2.6136717796325684, "rewards_train/2-2": 3.1713485717773438, "rewards_train/2-w": 1.7310121059417725, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.2748576402664185, "rewards_train/margins_1": 0.5933232307434082, "rewards_train/margins_2": 1.4403364658355713, "step": 131 }, { "epoch": 0.39, "logps_train/policy_1_2": -141.0679931640625, "logps_train/policy_1_l": -115.82882690429688, "logps_train/policy_1_w": -86.4051284790039, "logps_train/policy_2_2": -112.99468231201172, "logps_train/policy_2_w": -107.19544982910156, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": 0.5916386246681213, "rewards_train/1-l": -0.6725309491157532, "rewards_train/1-w": 1.1563620567321777, "rewards_train/2-2": 1.5239695310592651, "rewards_train/2-w": 0.9273301362991333, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.828893005847931, "rewards_train/margins_1": 0.5647234320640564, "rewards_train/margins_2": 0.5966393947601318, "step": 131 }, { "epoch": 0.39, "logps_train/policy_1_2": -199.45736694335938, "logps_train/policy_1_l": -155.12733459472656, "logps_train/policy_1_w": -135.21205139160156, "logps_train/policy_2_2": -157.221435546875, "logps_train/policy_2_w": -180.9176483154297, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 2.0335588455200195, "rewards_train/1-l": -1.315077304840088, "rewards_train/1-w": 2.3998889923095703, "rewards_train/2-2": 3.1489500999450684, "rewards_train/2-w": 1.2301100492477417, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.714966297149658, "rewards_train/margins_1": 0.3663301467895508, "rewards_train/margins_2": 1.9188400506973267, "step": 131 }, { "epoch": 0.39, "logps_train/policy_1_2": -250.705322265625, "logps_train/policy_1_l": -250.72406005859375, "logps_train/policy_1_w": -227.7953643798828, "logps_train/policy_2_2": -206.91514587402344, "logps_train/policy_2_w": -288.27435302734375, "logps_train/ref_1_2": -274.0, "logps_train/ref_1_l": -223.0, "logps_train/ref_1_w": -268.0, "logps_train/ref_2_2": -239.0, "logps_train/ref_2_w": -308.0, "rewards_train/1-2": 2.252124309539795, "rewards_train/1-l": -2.7996506690979004, "rewards_train/1-w": 4.033549785614014, "rewards_train/2-2": 3.221766948699951, "rewards_train/2-w": 2.073639392852783, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.833200454711914, "rewards_train/margins_1": 1.7814254760742188, "rewards_train/margins_2": 1.148127555847168, "step": 131 }, { "epoch": 0.39, "logps_train/policy_1_2": -250.7613067626953, "logps_train/policy_1_l": -220.7144775390625, "logps_train/policy_1_w": -178.2466278076172, "logps_train/policy_2_2": -214.21783447265625, "logps_train/policy_2_w": -215.4309539794922, "logps_train/ref_1_2": -272.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -248.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 2.2676188945770264, "rewards_train/1-l": -1.4620732069015503, "rewards_train/1-w": 2.2659621238708496, "rewards_train/2-2": 3.353217601776123, "rewards_train/2-w": 1.3881549835205078, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7280353307724, "rewards_train/margins_1": -0.0016567707061767578, "rewards_train/margins_2": 1.9650626182556152, "step": 131 }, { "epoch": 0.39, "logps_train/policy_1_2": -123.11564636230469, "logps_train/policy_1_l": -145.28269958496094, "logps_train/policy_1_w": -136.89645385742188, "logps_train/policy_2_2": -88.4571304321289, "logps_train/policy_2_w": -187.91006469726562, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": 0.9079667329788208, "rewards_train/1-l": -1.6928212642669678, "rewards_train/1-w": 2.2681667804718018, "rewards_train/2-2": 1.4433494806289673, "rewards_train/2-w": 0.7011817693710327, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.9609880447387695, "rewards_train/margins_1": 1.360200047492981, "rewards_train/margins_2": 0.7421677112579346, "step": 131 }, { "epoch": 0.39, "logps_train/policy_1_2": -145.8645477294922, "logps_train/policy_1_l": -126.15657043457031, "logps_train/policy_1_w": -90.93632507324219, "logps_train/policy_2_2": -119.01998138427734, "logps_train/policy_2_w": -107.6724624633789, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -115.5, "rewards_train/1-2": 0.40358275175094604, "rewards_train/1-l": -0.7124832272529602, "rewards_train/1-w": 1.3997271060943604, "rewards_train/2-2": 1.147220492362976, "rewards_train/2-w": 0.7991600632667542, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.1122103333473206, "rewards_train/margins_1": 0.9961443543434143, "rewards_train/margins_2": 0.3480604290962219, "step": 131 }, { "epoch": 0.39, "logps_train/policy_1_2": -152.35952758789062, "logps_train/policy_1_l": -141.1923065185547, "logps_train/policy_1_w": -128.43954467773438, "logps_train/policy_2_2": -127.78517150878906, "logps_train/policy_2_w": -159.2666473388672, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": 1.7171721458435059, "rewards_train/1-l": -1.2856366634368896, "rewards_train/1-w": 2.0052642822265625, "rewards_train/2-2": 2.3121085166931152, "rewards_train/2-w": 1.1608352661132812, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.290900945663452, "rewards_train/margins_1": 0.28809213638305664, "rewards_train/margins_2": 1.151273250579834, "step": 131 }, { "epoch": 0.4, "learning_rate": 4.71275652050611e-06, "loss": 0.8209, "step": 132 }, { "epoch": 0.4, "logps_train/policy_1_2": -195.72276306152344, "logps_train/policy_1_l": -204.51356506347656, "logps_train/policy_1_w": -147.93641662597656, "logps_train/policy_2_2": -160.20150756835938, "logps_train/policy_2_w": -201.9420928955078, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.8859267234802246, "rewards_train/1-l": -2.2745978832244873, "rewards_train/1-w": 3.1016697883605957, "rewards_train/2-2": 2.670865535736084, "rewards_train/2-w": 1.804227590560913, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.376267671585083, "rewards_train/margins_1": 1.215743064880371, "rewards_train/margins_2": 0.8666379451751709, "step": 132 }, { "epoch": 0.4, "logps_train/policy_1_2": -172.07333374023438, "logps_train/policy_1_l": -123.43489074707031, "logps_train/policy_1_w": -142.4799041748047, "logps_train/policy_2_2": -133.83139038085938, "logps_train/policy_2_w": -179.6149139404297, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 1.569227695465088, "rewards_train/1-l": -0.5339194536209106, "rewards_train/1-w": 3.2504477500915527, "rewards_train/2-2": 2.465299606323242, "rewards_train/2-w": 2.1541335582733154, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.7843672037124634, "rewards_train/margins_1": 1.6812200546264648, "rewards_train/margins_2": 0.31116604804992676, "step": 132 }, { "epoch": 0.4, "logps_train/policy_1_2": -108.68647003173828, "logps_train/policy_1_l": -177.56105041503906, "logps_train/policy_1_w": -103.95819091796875, "logps_train/policy_2_2": -92.98079681396484, "logps_train/policy_2_w": -122.32008361816406, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.4305716753005981, "rewards_train/1-l": -1.3131358623504639, "rewards_train/1-w": 1.5479309558868408, "rewards_train/2-2": 1.4733073711395264, "rewards_train/2-w": 1.0445539951324463, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.8610668182373047, "rewards_train/margins_1": 0.11735928058624268, "rewards_train/margins_2": 0.4287533760070801, "step": 132 }, { "epoch": 0.4, "logps_train/policy_1_2": -234.41964721679688, "logps_train/policy_1_l": -225.6343994140625, "logps_train/policy_1_w": -130.52310180664062, "logps_train/policy_2_2": -182.81271362304688, "logps_train/policy_2_w": -167.22933959960938, "logps_train/ref_1_2": -252.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.7509064674377441, "rewards_train/1-l": -1.6962523460388184, "rewards_train/1-w": 1.984311580657959, "rewards_train/2-2": 2.754129409790039, "rewards_train/2-w": 1.0718425512313843, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.6805639266967773, "rewards_train/margins_1": 0.23340511322021484, "rewards_train/margins_2": 1.6822868585586548, "step": 132 }, { "epoch": 0.4, "logps_train/policy_1_2": -138.4698944091797, "logps_train/policy_1_l": -158.87673950195312, "logps_train/policy_1_w": -108.9770278930664, "logps_train/policy_2_2": -111.96475219726562, "logps_train/policy_2_w": -152.11521911621094, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.9545732736587524, "rewards_train/1-l": -1.6786408424377441, "rewards_train/1-w": 2.247023820877075, "rewards_train/2-2": 2.6558687686920166, "rewards_train/2-w": 0.9916026592254639, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9256646633148193, "rewards_train/margins_1": 0.29245054721832275, "rewards_train/margins_2": 1.6642661094665527, "step": 132 }, { "epoch": 0.4, "logps_train/policy_1_2": -251.76336669921875, "logps_train/policy_1_l": -277.77978515625, "logps_train/policy_1_w": -151.8551025390625, "logps_train/policy_2_2": -210.9091339111328, "logps_train/policy_2_w": -202.35501098632812, "logps_train/ref_1_2": -266.0, "logps_train/ref_1_l": -260.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -236.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 1.3924132585525513, "rewards_train/1-l": -1.9029767513275146, "rewards_train/1-w": 3.4269890785217285, "rewards_train/2-2": 2.4403367042541504, "rewards_train/2-w": 2.289498805999756, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.329965829849243, "rewards_train/margins_1": 2.0345758199691772, "rewards_train/margins_2": 0.15083789825439453, "step": 132 }, { "epoch": 0.4, "logps_train/policy_1_2": -134.7413787841797, "logps_train/policy_1_l": -194.5221405029297, "logps_train/policy_1_w": -142.60150146484375, "logps_train/policy_2_2": -111.82508850097656, "logps_train/policy_2_w": -173.0243377685547, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.2914869785308838, "rewards_train/1-l": -1.5459632873535156, "rewards_train/1-w": 2.6961002349853516, "rewards_train/2-2": 1.7081162929534912, "rewards_train/2-w": 1.7913159132003784, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.242063522338867, "rewards_train/margins_1": 1.4046132564544678, "rewards_train/margins_2": -0.08319962024688721, "step": 132 }, { "epoch": 0.4, "logps_train/policy_1_2": -135.1891632080078, "logps_train/policy_1_l": -188.08648681640625, "logps_train/policy_1_w": -136.72738647460938, "logps_train/policy_2_2": -114.7955322265625, "logps_train/policy_2_w": -162.78396606445312, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.7088178396224976, "rewards_train/1-l": -1.4749572277069092, "rewards_train/1-w": 1.5741350650787354, "rewards_train/2-2": 2.0505242347717285, "rewards_train/2-w": 1.1419172286987305, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.0490922927856445, "rewards_train/margins_1": -0.1346827745437622, "rewards_train/margins_2": 0.908607006072998, "step": 132 }, { "epoch": 0.4, "logps_train/policy_1_2": -152.4993896484375, "logps_train/policy_1_l": -164.17518615722656, "logps_train/policy_1_w": -126.06834411621094, "logps_train/policy_2_2": -116.46998596191406, "logps_train/policy_2_w": -161.46429443359375, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.8938117027282715, "rewards_train/1-l": -1.4445693492889404, "rewards_train/1-w": 1.860352635383606, "rewards_train/2-2": 1.7998759746551514, "rewards_train/2-w": 0.6738821268081665, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.3049219846725464, "rewards_train/margins_1": 0.9665409326553345, "rewards_train/margins_2": 1.1259938478469849, "step": 133 }, { "epoch": 0.4, "logps_train/policy_1_2": -178.66932678222656, "logps_train/policy_1_l": -99.98220825195312, "logps_train/policy_1_w": -111.49268341064453, "logps_train/policy_2_2": -150.57022094726562, "logps_train/policy_2_w": -137.80267333984375, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.7596309185028076, "rewards_train/1-l": -0.7603297829627991, "rewards_train/1-w": 1.9335441589355469, "rewards_train/2-2": 2.605672836303711, "rewards_train/2-w": 1.5291080474853516, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.693873941898346, "rewards_train/margins_1": 0.17391324043273926, "rewards_train/margins_2": 1.0765647888183594, "step": 133 }, { "epoch": 0.4, "logps_train/policy_1_2": -178.42910766601562, "logps_train/policy_1_l": -245.19708251953125, "logps_train/policy_1_w": -123.00871276855469, "logps_train/policy_2_2": -155.00335693359375, "logps_train/policy_2_w": -154.88052368164062, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -243.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 1.5449800491333008, "rewards_train/1-l": -0.23357492685317993, "rewards_train/1-w": 2.0788161754608154, "rewards_train/2-2": 2.279350757598877, "rewards_train/2-w": 1.4291349649429321, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.3123911023139954, "rewards_train/margins_1": 0.5338361263275146, "rewards_train/margins_2": 0.8502157926559448, "step": 133 }, { "epoch": 0.4, "logps_train/policy_1_2": -162.95718383789062, "logps_train/policy_1_l": -189.1929931640625, "logps_train/policy_1_w": -122.78936767578125, "logps_train/policy_2_2": -138.8517303466797, "logps_train/policy_2_w": -153.16876220703125, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.548032283782959, "rewards_train/1-l": -1.5646123886108398, "rewards_train/1-w": 2.2866878509521484, "rewards_train/2-2": 2.2960774898529053, "rewards_train/2-w": 1.873748540878296, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8513002395629883, "rewards_train/margins_1": 0.7386555671691895, "rewards_train/margins_2": 0.4223289489746094, "step": 133 }, { "epoch": 0.4, "logps_train/policy_1_2": -182.4448699951172, "logps_train/policy_1_l": -162.575439453125, "logps_train/policy_1_w": -150.76121520996094, "logps_train/policy_2_2": -145.71446228027344, "logps_train/policy_2_w": -177.1030731201172, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.3492627143859863, "rewards_train/1-l": -2.4061756134033203, "rewards_train/1-w": 2.208253860473633, "rewards_train/2-2": 2.247107744216919, "rewards_train/2-w": 1.3553178310394287, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.614429473876953, "rewards_train/margins_1": 0.8589911460876465, "rewards_train/margins_2": 0.8917899131774902, "step": 133 }, { "epoch": 0.4, "logps_train/policy_1_2": -108.14572143554688, "logps_train/policy_1_l": -89.79367065429688, "logps_train/policy_1_w": -108.10462951660156, "logps_train/policy_2_2": -89.93670654296875, "logps_train/policy_2_w": -134.220703125, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.5401160717010498, "rewards_train/1-l": -0.8968276977539062, "rewards_train/1-w": 2.6882872581481934, "rewards_train/2-2": 1.89890718460083, "rewards_train/2-w": 1.8817298412322998, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.5851149559020996, "rewards_train/margins_1": 1.1481711864471436, "rewards_train/margins_2": 0.017177343368530273, "step": 133 }, { "epoch": 0.4, "logps_train/policy_1_2": -174.603515625, "logps_train/policy_1_l": -193.32321166992188, "logps_train/policy_1_w": -154.13995361328125, "logps_train/policy_2_2": -147.25462341308594, "logps_train/policy_2_w": -180.27496337890625, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.3505859375, "rewards_train/1-l": -1.8881808519363403, "rewards_train/1-w": 1.7887394428253174, "rewards_train/2-2": 2.1395769119262695, "rewards_train/2-w": 0.9580515623092651, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6769202947616577, "rewards_train/margins_1": 0.4381535053253174, "rewards_train/margins_2": 1.1815253496170044, "step": 133 }, { "epoch": 0.4, "logps_train/policy_1_2": -162.99481201171875, "logps_train/policy_1_l": -181.54153442382812, "logps_train/policy_1_w": -111.18424987792969, "logps_train/policy_2_2": -129.0675506591797, "logps_train/policy_2_w": -144.63360595703125, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 2.12864351272583, "rewards_train/1-l": -1.6150901317596436, "rewards_train/1-w": 2.5511069297790527, "rewards_train/2-2": 2.632308006286621, "rewards_train/2-w": 2.121796131134033, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.166197061538696, "rewards_train/margins_1": 0.42246341705322266, "rewards_train/margins_2": 0.5105118751525879, "step": 133 }, { "epoch": 0.4, "learning_rate": 4.7011543523898e-06, "loss": 0.8001, "step": 134 }, { "epoch": 0.4, "logps_train/policy_1_2": -216.50927734375, "logps_train/policy_1_l": -207.80072021484375, "logps_train/policy_1_w": -188.99954223632812, "logps_train/policy_2_2": -172.37167358398438, "logps_train/policy_2_w": -228.2800750732422, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 1.4803223609924316, "rewards_train/1-l": -1.0863208770751953, "rewards_train/1-w": 2.9000463485717773, "rewards_train/2-2": 3.0190820693969727, "rewards_train/2-w": 1.8594921827316284, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9863672256469727, "rewards_train/margins_1": 1.4197239875793457, "rewards_train/margins_2": 1.1595898866653442, "step": 134 }, { "epoch": 0.4, "logps_train/policy_1_2": -167.32968139648438, "logps_train/policy_1_l": -201.81285095214844, "logps_train/policy_1_w": -149.43545532226562, "logps_train/policy_2_2": -133.53677368164062, "logps_train/policy_2_w": -186.7111358642578, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.4670320749282837, "rewards_train/1-l": -1.9660507440567017, "rewards_train/1-w": 2.4322357177734375, "rewards_train/2-2": 2.4525723457336426, "rewards_train/2-w": 1.3675577640533447, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.398286461830139, "rewards_train/margins_1": 0.9652036428451538, "rewards_train/margins_2": 1.0850145816802979, "step": 134 }, { "epoch": 0.4, "logps_train/policy_1_2": -147.03054809570312, "logps_train/policy_1_l": -174.64248657226562, "logps_train/policy_1_w": -150.85386657714844, "logps_train/policy_2_2": -112.89836120605469, "logps_train/policy_2_w": -191.37644958496094, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 0.7735071182250977, "rewards_train/1-l": -1.5238196849822998, "rewards_train/1-w": 3.1005516052246094, "rewards_train/2-2": 1.25430428981781, "rewards_train/2-w": 1.8873549699783325, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.624371290206909, "rewards_train/margins_1": 2.3270444869995117, "rewards_train/margins_2": -0.6330506801605225, "step": 134 }, { "epoch": 0.4, "logps_train/policy_1_2": -122.7352523803711, "logps_train/policy_1_l": -129.79910278320312, "logps_train/policy_1_w": -154.24871826171875, "logps_train/policy_2_2": -105.53410339355469, "logps_train/policy_2_w": -181.47579956054688, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.1889746189117432, "rewards_train/1-l": -1.4896764755249023, "rewards_train/1-w": 2.420832633972168, "rewards_train/2-2": 1.535651445388794, "rewards_train/2-w": 1.4731237888336182, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.9105091094970703, "rewards_train/margins_1": 1.2318580150604248, "rewards_train/margins_2": 0.06252765655517578, "step": 134 }, { "epoch": 0.4, "logps_train/policy_1_2": -175.34100341796875, "logps_train/policy_1_l": -116.79350280761719, "logps_train/policy_1_w": -119.28474426269531, "logps_train/policy_2_2": -143.81442260742188, "logps_train/policy_2_w": -154.97235107421875, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 2.190901279449463, "rewards_train/1-l": -0.7711470723152161, "rewards_train/1-w": 1.8465263843536377, "rewards_train/2-2": 2.681057929992676, "rewards_train/2-w": 1.0683903694152832, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.6176734566688538, "rewards_train/margins_1": -0.3443748950958252, "rewards_train/margins_2": 1.6126675605773926, "step": 134 }, { "epoch": 0.4, "logps_train/policy_1_2": -153.13278198242188, "logps_train/policy_1_l": -177.74075317382812, "logps_train/policy_1_w": -124.0940933227539, "logps_train/policy_2_2": -123.6954116821289, "logps_train/policy_2_w": -158.112060546875, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.1247092485427856, "rewards_train/1-l": -1.3330612182617188, "rewards_train/1-w": 2.06402850151062, "rewards_train/2-2": 1.683485984802246, "rewards_train/2-w": 1.1130129098892212, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.397089719772339, "rewards_train/margins_1": 0.9393192529678345, "rewards_train/margins_2": 0.5704730749130249, "step": 134 }, { "epoch": 0.4, "logps_train/policy_1_2": -110.01579284667969, "logps_train/policy_1_l": -155.03012084960938, "logps_train/policy_1_w": -159.23968505859375, "logps_train/policy_2_2": -91.69769287109375, "logps_train/policy_2_w": -186.02322387695312, "logps_train/ref_1_2": -117.5, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 0.7633618116378784, "rewards_train/1-l": -1.296714186668396, "rewards_train/1-w": 1.5463438034057617, "rewards_train/2-2": 1.0515682697296143, "rewards_train/2-w": 0.8461161851882935, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.8430579900741577, "rewards_train/margins_1": 0.7829819917678833, "rewards_train/margins_2": 0.2054520845413208, "step": 134 }, { "epoch": 0.4, "logps_train/policy_1_2": -182.2700653076172, "logps_train/policy_1_l": -261.00537109375, "logps_train/policy_1_w": -183.250244140625, "logps_train/policy_2_2": -160.14468383789062, "logps_train/policy_2_w": -218.10177612304688, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -234.0, "logps_train/ref_1_w": -216.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 2.4229936599731445, "rewards_train/1-l": -2.6737799644470215, "rewards_train/1-w": 3.3046631813049316, "rewards_train/2-2": 3.026157855987549, "rewards_train/2-w": 2.051542282104492, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.978443145751953, "rewards_train/margins_1": 0.8816695213317871, "rewards_train/margins_2": 0.9746155738830566, "step": 134 }, { "epoch": 0.4, "logps_train/policy_1_2": -139.8367462158203, "logps_train/policy_1_l": -103.82826232910156, "logps_train/policy_1_w": -137.36041259765625, "logps_train/policy_2_2": -121.51826477050781, "logps_train/policy_2_w": -157.69992065429688, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.1757001876831055, "rewards_train/1-l": -0.6437634229660034, "rewards_train/1-w": 2.022552967071533, "rewards_train/2-2": 1.2528609037399292, "rewards_train/2-w": 1.2866499423980713, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.6663163900375366, "rewards_train/margins_1": 0.8468527793884277, "rewards_train/margins_2": -0.03378903865814209, "step": 135 }, { "epoch": 0.4, "logps_train/policy_1_2": -87.87629699707031, "logps_train/policy_1_l": -151.28997802734375, "logps_train/policy_1_w": -77.84391784667969, "logps_train/policy_2_2": -71.45722961425781, "logps_train/policy_2_w": -92.72029876708984, "logps_train/ref_1_2": -99.5, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -87.5, "logps_train/ref_2_w": -101.5, "rewards_train/1-2": 1.1592458486557007, "rewards_train/1-l": -2.6448190212249756, "rewards_train/1-w": 1.3515461683273315, "rewards_train/2-2": 1.5902149677276611, "rewards_train/2-w": 0.87250155210495, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.996365189552307, "rewards_train/margins_1": 0.19230031967163086, "rewards_train/margins_2": 0.7177134156227112, "step": 135 }, { "epoch": 0.4, "logps_train/policy_1_2": -157.6899871826172, "logps_train/policy_1_l": -171.26681518554688, "logps_train/policy_1_w": -122.89600372314453, "logps_train/policy_2_2": -132.3829345703125, "logps_train/policy_2_w": -143.9131317138672, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.4013136625289917, "rewards_train/1-l": -1.5450408458709717, "rewards_train/1-w": 1.814306378364563, "rewards_train/2-2": 2.080456256866455, "rewards_train/2-w": 1.5469683408737183, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3593472242355347, "rewards_train/margins_1": 0.4129927158355713, "rewards_train/margins_2": 0.5334879159927368, "step": 135 }, { "epoch": 0.4, "logps_train/policy_1_2": -151.4544677734375, "logps_train/policy_1_l": -159.723876953125, "logps_train/policy_1_w": -149.48394775390625, "logps_train/policy_2_2": -111.59420776367188, "logps_train/policy_2_w": -181.29498291015625, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 0.840491533279419, "rewards_train/1-l": -1.4458255767822266, "rewards_train/1-w": 1.6391046047210693, "rewards_train/2-2": 1.7733923196792603, "rewards_train/2-w": 1.1314380168914795, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.084930181503296, "rewards_train/margins_1": 0.7986130714416504, "rewards_train/margins_2": 0.6419543027877808, "step": 135 }, { "epoch": 0.4, "logps_train/policy_1_2": -191.41607666015625, "logps_train/policy_1_l": -197.500732421875, "logps_train/policy_1_w": -147.7887420654297, "logps_train/policy_2_2": -160.14047241210938, "logps_train/policy_2_w": -187.96334838867188, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.9115184545516968, "rewards_train/1-l": -1.6215571165084839, "rewards_train/1-w": 2.198519229888916, "rewards_train/2-2": 2.8406403064727783, "rewards_train/2-w": 1.068018913269043, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8200763463974, "rewards_train/margins_1": 0.28700077533721924, "rewards_train/margins_2": 1.7726213932037354, "step": 135 }, { "epoch": 0.4, "logps_train/policy_1_2": -192.70150756835938, "logps_train/policy_1_l": -139.46551513671875, "logps_train/policy_1_w": -130.91653442382812, "logps_train/policy_2_2": -158.3838653564453, "logps_train/policy_2_w": -158.53274536132812, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.6474268436431885, "rewards_train/1-l": -1.3047065734863281, "rewards_train/1-w": 2.5989716053009033, "rewards_train/2-2": 2.773136615753174, "rewards_train/2-w": 1.7404749393463135, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.9036781787872314, "rewards_train/margins_1": 0.9515447616577148, "rewards_train/margins_2": 1.0326616764068604, "step": 135 }, { "epoch": 0.4, "logps_train/policy_1_2": -144.80093383789062, "logps_train/policy_1_l": -156.57974243164062, "logps_train/policy_1_w": -116.18275451660156, "logps_train/policy_2_2": -114.22380065917969, "logps_train/policy_2_w": -145.30044555664062, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 0.6652191877365112, "rewards_train/1-l": -1.5935214757919312, "rewards_train/1-w": 2.348911762237549, "rewards_train/2-2": 1.3530105352401733, "rewards_train/2-w": 1.4152686595916748, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.94243323802948, "rewards_train/margins_1": 1.6836925745010376, "rewards_train/margins_2": -0.062258124351501465, "step": 135 }, { "epoch": 0.4, "logps_train/policy_1_2": -142.60549926757812, "logps_train/policy_1_l": -141.22872924804688, "logps_train/policy_1_w": -99.16771697998047, "logps_train/policy_2_2": -123.032470703125, "logps_train/policy_2_w": -111.39054107666016, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.4285119771957397, "rewards_train/1-l": -1.4338092803955078, "rewards_train/1-w": 1.5738531351089478, "rewards_train/2-2": 2.1897215843200684, "rewards_train/2-w": 1.504695177078247, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0076624155044556, "rewards_train/margins_1": 0.145341157913208, "rewards_train/margins_2": 0.6850264072418213, "step": 135 }, { "epoch": 0.41, "learning_rate": 4.689337355489092e-06, "loss": 0.7656, "step": 136 }, { "epoch": 0.41, "logps_train/policy_1_2": -124.95291900634766, "logps_train/policy_1_l": -116.27449798583984, "logps_train/policy_1_w": -126.57049560546875, "logps_train/policy_2_2": -105.347900390625, "logps_train/policy_2_w": -155.03709411621094, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.0906457901000977, "rewards_train/1-l": -0.5329183340072632, "rewards_train/1-w": 1.6835752725601196, "rewards_train/2-2": 1.3863036632537842, "rewards_train/2-w": 0.9728522300720215, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.216493606567383, "rewards_train/margins_1": 0.592929482460022, "rewards_train/margins_2": 0.4134514331817627, "step": 136 }, { "epoch": 0.41, "logps_train/policy_1_2": -138.9091796875, "logps_train/policy_1_l": -189.11180114746094, "logps_train/policy_1_w": -148.95196533203125, "logps_train/policy_2_2": -109.00419616699219, "logps_train/policy_2_w": -195.6371612548828, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 1.186035394668579, "rewards_train/1-l": -1.2103989124298096, "rewards_train/1-w": 1.7360539436340332, "rewards_train/2-2": 1.6747753620147705, "rewards_train/2-w": 0.7706587314605713, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.9464528560638428, "rewards_train/margins_1": 0.5500185489654541, "rewards_train/margins_2": 0.9041166305541992, "step": 136 }, { "epoch": 0.41, "logps_train/policy_1_2": -134.94119262695312, "logps_train/policy_1_l": -103.80302429199219, "logps_train/policy_1_w": -82.37948608398438, "logps_train/policy_2_2": -110.12400817871094, "logps_train/policy_2_w": -95.12387084960938, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -92.5, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -102.0, "rewards_train/1-2": 1.6996304988861084, "rewards_train/1-l": -0.6443645358085632, "rewards_train/1-w": 1.0058008432388306, "rewards_train/2-2": 2.1813483238220215, "rewards_train/2-w": 0.6715972423553467, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.6501653790473938, "rewards_train/margins_1": -0.6938296556472778, "rewards_train/margins_2": 1.5097510814666748, "step": 136 }, { "epoch": 0.41, "logps_train/policy_1_2": -138.65155029296875, "logps_train/policy_1_l": -84.9120864868164, "logps_train/policy_1_w": -55.32450485229492, "logps_train/policy_2_2": -101.68460083007812, "logps_train/policy_2_w": -81.39373779296875, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -80.5, "logps_train/ref_1_w": -60.75, "logps_train/ref_2_2": -122.5, "logps_train/ref_2_w": -81.5, "rewards_train/1-2": 1.2340627908706665, "rewards_train/1-l": -0.452146053314209, "rewards_train/1-w": 0.5456742644309998, "rewards_train/2-2": 2.076071262359619, "rewards_train/2-w": 0.02703261375427246, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 0.9978203177452087, "rewards_train/margins_1": -0.6883885264396667, "rewards_train/margins_2": 2.0490386486053467, "step": 136 }, { "epoch": 0.41, "logps_train/policy_1_2": -192.67703247070312, "logps_train/policy_1_l": -151.68197631835938, "logps_train/policy_1_w": -161.33950805664062, "logps_train/policy_2_2": -164.24508666992188, "logps_train/policy_2_w": -186.38320922851562, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.7416720390319824, "rewards_train/1-l": -0.6244487166404724, "rewards_train/1-w": 1.5910497903823853, "rewards_train/2-2": 2.3754913806915283, "rewards_train/2-w": 1.1616783142089844, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.2154985070228577, "rewards_train/margins_1": -0.15062224864959717, "rewards_train/margins_2": 1.213813066482544, "step": 136 }, { "epoch": 0.41, "logps_train/policy_1_2": -176.2660675048828, "logps_train/policy_1_l": -187.06179809570312, "logps_train/policy_1_w": -116.7213134765625, "logps_train/policy_2_2": -153.2435760498047, "logps_train/policy_2_w": -143.81927490234375, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.746830940246582, "rewards_train/1-l": -1.433914303779602, "rewards_train/1-w": 2.4739620685577393, "rewards_train/2-2": 2.331892490386963, "rewards_train/2-w": 1.9508858919143677, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.9078763723373413, "rewards_train/margins_1": 0.7271311283111572, "rewards_train/margins_2": 0.3810065984725952, "step": 136 }, { "epoch": 0.41, "logps_train/policy_1_2": -136.26840209960938, "logps_train/policy_1_l": -169.88525390625, "logps_train/policy_1_w": -156.81088256835938, "logps_train/policy_2_2": -107.19485473632812, "logps_train/policy_2_w": -190.32713317871094, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 1.0388824939727783, "rewards_train/1-l": -1.8320319652557373, "rewards_train/1-w": 2.58453631401062, "rewards_train/2-2": 1.65297532081604, "rewards_train/2-w": 1.4729511737823486, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.416568279266357, "rewards_train/margins_1": 1.5456538200378418, "rewards_train/margins_2": 0.1800241470336914, "step": 136 }, { "epoch": 0.41, "logps_train/policy_1_2": -97.79122161865234, "logps_train/policy_1_l": -90.69528198242188, "logps_train/policy_1_w": -87.75926208496094, "logps_train/policy_2_2": -80.131103515625, "logps_train/policy_2_w": -112.33422088623047, "logps_train/ref_1_2": -105.5, "logps_train/ref_1_l": -80.5, "logps_train/ref_1_w": -94.5, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 0.7771281599998474, "rewards_train/1-l": -0.9988254904747009, "rewards_train/1-w": 0.6975115537643433, "rewards_train/2-2": 1.1689209938049316, "rewards_train/2-w": 0.6404059529304504, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.6963370442390442, "rewards_train/margins_1": -0.07961660623550415, "rewards_train/margins_2": 0.5285150408744812, "step": 136 }, { "epoch": 0.41, "logps_train/policy_1_2": -143.9521484375, "logps_train/policy_1_l": -156.64105224609375, "logps_train/policy_1_w": -125.29754638671875, "logps_train/policy_2_2": -125.06863403320312, "logps_train/policy_2_w": -141.15599060058594, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.170409083366394, "rewards_train/1-l": -0.5630303025245667, "rewards_train/1-w": 1.646808385848999, "rewards_train/2-2": 1.537668228149414, "rewards_train/2-w": 1.1094013452529907, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.2098386883735657, "rewards_train/margins_1": 0.476399302482605, "rewards_train/margins_2": 0.42826688289642334, "step": 137 }, { "epoch": 0.41, "logps_train/policy_1_2": -119.07246398925781, "logps_train/policy_1_l": -104.89064025878906, "logps_train/policy_1_w": -103.86921691894531, "logps_train/policy_2_2": -98.38114929199219, "logps_train/policy_2_w": -118.16452026367188, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.9075970649719238, "rewards_train/1-l": -0.7944349050521851, "rewards_train/1-w": 1.7601492404937744, "rewards_train/2-2": 2.3423538208007812, "rewards_train/2-w": 1.1597201824188232, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.5545841455459595, "rewards_train/margins_1": -0.14744782447814941, "rewards_train/margins_2": 1.182633638381958, "step": 137 }, { "epoch": 0.41, "logps_train/policy_1_2": -109.51762390136719, "logps_train/policy_1_l": -154.43267822265625, "logps_train/policy_1_w": -77.2549057006836, "logps_train/policy_2_2": -87.0098876953125, "logps_train/policy_2_w": -109.34848022460938, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -117.0, "rewards_train/1-2": 0.9677690267562866, "rewards_train/1-l": -1.0454156398773193, "rewards_train/1-w": 1.2995095252990723, "rewards_train/2-2": 1.295104742050171, "rewards_train/2-w": 0.7870269417762756, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.3449251651763916, "rewards_train/margins_1": 0.33174049854278564, "rewards_train/margins_2": 0.5080778002738953, "step": 137 }, { "epoch": 0.41, "logps_train/policy_1_2": -118.58209991455078, "logps_train/policy_1_l": -89.9771728515625, "logps_train/policy_1_w": -79.56547546386719, "logps_train/policy_2_2": -102.35060119628906, "logps_train/policy_2_w": -90.27072143554688, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -80.0, "logps_train/ref_1_w": -89.5, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -97.0, "rewards_train/1-2": 1.126945972442627, "rewards_train/1-l": -0.9955692291259766, "rewards_train/1-w": 0.990327000617981, "rewards_train/2-2": 1.3192366361618042, "rewards_train/2-w": 0.651443362236023, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.9858962297439575, "rewards_train/margins_1": -0.136618971824646, "rewards_train/margins_2": 0.6677932739257812, "step": 137 }, { "epoch": 0.41, "logps_train/policy_1_2": -138.87220764160156, "logps_train/policy_1_l": -150.25408935546875, "logps_train/policy_1_w": -114.90995025634766, "logps_train/policy_2_2": -112.51997375488281, "logps_train/policy_2_w": -155.9097137451172, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 0.8541840314865112, "rewards_train/1-l": -0.7715020179748535, "rewards_train/1-w": 1.783613681793213, "rewards_train/2-2": 1.0628468990325928, "rewards_train/2-w": 1.3062949180603027, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.5551156997680664, "rewards_train/margins_1": 0.9294296503067017, "rewards_train/margins_2": -0.24344801902770996, "step": 137 }, { "epoch": 0.41, "logps_train/policy_1_2": -182.98162841796875, "logps_train/policy_1_l": -168.49778747558594, "logps_train/policy_1_w": -149.730224609375, "logps_train/policy_2_2": -153.26840209960938, "logps_train/policy_2_w": -172.84605407714844, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.2565244436264038, "rewards_train/1-l": -1.1810286045074463, "rewards_train/1-w": 2.3847904205322266, "rewards_train/2-2": 2.1450347900390625, "rewards_train/2-w": 1.7091453075408936, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.565819025039673, "rewards_train/margins_1": 1.1282659769058228, "rewards_train/margins_2": 0.43588948249816895, "step": 137 }, { "epoch": 0.41, "logps_train/policy_1_2": -151.892578125, "logps_train/policy_1_l": -114.33662414550781, "logps_train/policy_1_w": -123.31668853759766, "logps_train/policy_2_2": -122.5103759765625, "logps_train/policy_2_w": -154.0436553955078, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.491992712020874, "rewards_train/1-l": -0.7219924926757812, "rewards_train/1-w": 1.6121790409088135, "rewards_train/2-2": 2.1723990440368652, "rewards_train/2-w": 0.8666306138038635, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.3341715335845947, "rewards_train/margins_1": 0.12018632888793945, "rewards_train/margins_2": 1.3057684302330017, "step": 137 }, { "epoch": 0.41, "logps_train/policy_1_2": -258.51080322265625, "logps_train/policy_1_l": -266.9127197265625, "logps_train/policy_1_w": -240.37538146972656, "logps_train/policy_2_2": -219.6066436767578, "logps_train/policy_2_w": -277.25811767578125, "logps_train/ref_1_2": -278.0, "logps_train/ref_1_l": -240.0, "logps_train/ref_1_w": -280.0, "logps_train/ref_2_2": -250.0, "logps_train/ref_2_w": -304.0, "rewards_train/1-2": 2.036419153213501, "rewards_train/1-l": -2.635021209716797, "rewards_train/1-w": 3.962461471557617, "rewards_train/2-2": 3.0080859661102295, "rewards_train/2-w": 2.7054357528686523, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.597482681274414, "rewards_train/margins_1": 1.9260423183441162, "rewards_train/margins_2": 0.30265021324157715, "step": 137 }, { "epoch": 0.41, "learning_rate": 4.677306683122054e-06, "loss": 0.9064, "step": 138 }, { "epoch": 0.41, "logps_train/policy_1_2": -239.0284423828125, "logps_train/policy_1_l": -236.06781005859375, "logps_train/policy_1_w": -150.88125610351562, "logps_train/policy_2_2": -196.1281280517578, "logps_train/policy_2_w": -183.56710815429688, "logps_train/ref_1_2": -253.0, "logps_train/ref_1_l": -214.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -224.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.406139850616455, "rewards_train/1-l": -2.183147430419922, "rewards_train/1-w": 1.641562581062317, "rewards_train/2-2": 2.6762495040893555, "rewards_train/2-w": 0.7636005878448486, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.8247100114822388, "rewards_train/margins_1": 0.23542273044586182, "rewards_train/margins_2": 1.9126489162445068, "step": 138 }, { "epoch": 0.41, "logps_train/policy_1_2": -160.9010467529297, "logps_train/policy_1_l": -225.61175537109375, "logps_train/policy_1_w": -189.53875732421875, "logps_train/policy_2_2": -141.25238037109375, "logps_train/policy_2_w": -219.44459533691406, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 1.741145133972168, "rewards_train/1-l": -2.2822694778442383, "rewards_train/1-w": 2.2211251258850098, "rewards_train/2-2": 2.262261390686035, "rewards_train/2-w": 1.3242902755737305, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.503394603729248, "rewards_train/margins_1": 0.4799799919128418, "rewards_train/margins_2": 0.9379711151123047, "step": 138 }, { "epoch": 0.41, "logps_train/policy_1_2": -127.6006851196289, "logps_train/policy_1_l": -97.13851928710938, "logps_train/policy_1_w": -134.20672607421875, "logps_train/policy_2_2": -101.76041412353516, "logps_train/policy_2_w": -167.41380310058594, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 0.810243546962738, "rewards_train/1-l": -0.74471116065979, "rewards_train/1-w": 1.6887035369873047, "rewards_train/2-2": 1.4958336353302002, "rewards_train/2-w": 0.767994225025177, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.4334146976470947, "rewards_train/margins_1": 0.8784599900245667, "rewards_train/margins_2": 0.7278394103050232, "step": 138 }, { "epoch": 0.41, "logps_train/policy_1_2": -231.7054443359375, "logps_train/policy_1_l": -201.56178283691406, "logps_train/policy_1_w": -153.76126098632812, "logps_train/policy_2_2": -196.18316650390625, "logps_train/policy_2_w": -179.60293579101562, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.7138307094573975, "rewards_train/1-l": -1.4874272346496582, "rewards_train/1-w": 2.248873710632324, "rewards_train/2-2": 2.5160579681396484, "rewards_train/2-w": 1.5803303718566895, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7363009452819824, "rewards_train/margins_1": 0.5350430011749268, "rewards_train/margins_2": 0.935727596282959, "step": 138 }, { "epoch": 0.41, "logps_train/policy_1_2": -95.4141845703125, "logps_train/policy_1_l": -55.00850296020508, "logps_train/policy_1_w": -130.1402587890625, "logps_train/policy_2_2": -76.25537872314453, "logps_train/policy_2_w": -180.10247802734375, "logps_train/ref_1_2": -98.5, "logps_train/ref_1_l": -51.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -80.5, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 0.3163945972919464, "rewards_train/1-l": -0.39889752864837646, "rewards_train/1-w": 1.9242563247680664, "rewards_train/2-2": 0.42836833000183105, "rewards_train/2-w": 0.31787773966789246, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.323153853416443, "rewards_train/margins_1": 1.60786172747612, "rewards_train/margins_2": 0.1104905903339386, "step": 138 }, { "epoch": 0.41, "logps_train/policy_1_2": -180.064208984375, "logps_train/policy_1_l": -128.07882690429688, "logps_train/policy_1_w": -82.9780044555664, "logps_train/policy_2_2": -153.12254333496094, "logps_train/policy_2_w": -112.34746551513672, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -115.5, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.4467034339904785, "rewards_train/1-l": -1.2633512020111084, "rewards_train/1-w": 2.2529804706573486, "rewards_train/2-2": 2.2779805660247803, "rewards_train/2-w": 1.6371281147003174, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.516331672668457, "rewards_train/margins_1": 0.8062770366668701, "rewards_train/margins_2": 0.6408524513244629, "step": 138 }, { "epoch": 0.41, "logps_train/policy_1_2": -126.97048950195312, "logps_train/policy_1_l": -145.109619140625, "logps_train/policy_1_w": -140.10610961914062, "logps_train/policy_2_2": -92.76702880859375, "logps_train/policy_2_w": -185.25338745117188, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.1576387882232666, "rewards_train/1-l": -1.053382158279419, "rewards_train/1-w": 1.8690762519836426, "rewards_train/2-2": 1.8264219760894775, "rewards_train/2-w": 0.7418491840362549, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.9224584102630615, "rewards_train/margins_1": 0.711437463760376, "rewards_train/margins_2": 1.0845727920532227, "step": 138 }, { "epoch": 0.41, "logps_train/policy_1_2": -218.54754638671875, "logps_train/policy_1_l": -229.92637634277344, "logps_train/policy_1_w": -209.53244018554688, "logps_train/policy_2_2": -175.1885528564453, "logps_train/policy_2_w": -250.3738555908203, "logps_train/ref_1_2": -235.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -236.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 1.6577454805374146, "rewards_train/1-l": -1.8340436220169067, "rewards_train/1-w": 2.703005075454712, "rewards_train/2-2": 3.0248942375183105, "rewards_train/2-w": 1.6688647270202637, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.537048697471619, "rewards_train/margins_1": 1.0452595949172974, "rewards_train/margins_2": 1.3560295104980469, "step": 138 }, { "epoch": 0.42, "logps_train/policy_1_2": -173.32699584960938, "logps_train/policy_1_l": -159.04026794433594, "logps_train/policy_1_w": -99.10044860839844, "logps_train/policy_2_2": -142.76202392578125, "logps_train/policy_2_w": -132.29763793945312, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.1829264163970947, "rewards_train/1-l": -2.169651508331299, "rewards_train/1-w": 1.7774560451507568, "rewards_train/2-2": 1.7487969398498535, "rewards_train/2-w": 0.867112398147583, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9471075534820557, "rewards_train/margins_1": 0.5945296287536621, "rewards_train/margins_2": 0.8816845417022705, "step": 139 }, { "epoch": 0.42, "logps_train/policy_1_2": -126.88446044921875, "logps_train/policy_1_l": -136.99038696289062, "logps_train/policy_1_w": -107.13392639160156, "logps_train/policy_2_2": -104.60408020019531, "logps_train/policy_2_w": -127.87118530273438, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.3678038120269775, "rewards_train/1-l": -1.0742336511611938, "rewards_train/1-w": 2.395982027053833, "rewards_train/2-2": 2.0005295276641846, "rewards_train/2-w": 1.9800689220428467, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.470215678215027, "rewards_train/margins_1": 1.0281782150268555, "rewards_train/margins_2": 0.02046060562133789, "step": 139 }, { "epoch": 0.42, "logps_train/policy_1_2": -249.12350463867188, "logps_train/policy_1_l": -189.2698974609375, "logps_train/policy_1_w": -164.1431121826172, "logps_train/policy_2_2": -209.80906677246094, "logps_train/policy_2_w": -201.06594848632812, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -241.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.7931177616119385, "rewards_train/1-l": -2.323669672012329, "rewards_train/1-w": 2.790376663208008, "rewards_train/2-2": 3.1433119773864746, "rewards_train/2-w": 1.8449666500091553, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.114046335220337, "rewards_train/margins_1": 0.9972589015960693, "rewards_train/margins_2": 1.2983453273773193, "step": 139 }, { "epoch": 0.42, "logps_train/policy_1_2": -136.2803955078125, "logps_train/policy_1_l": -65.09109497070312, "logps_train/policy_1_w": -58.427528381347656, "logps_train/policy_2_2": -105.5010757446289, "logps_train/policy_2_w": -82.32896423339844, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -56.25, "logps_train/ref_1_w": -71.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": 1.0172721147537231, "rewards_train/1-l": -0.8930939435958862, "rewards_train/1-w": 1.2652544975280762, "rewards_train/2-2": 1.9631738662719727, "rewards_train/2-w": 0.9850726127624512, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.1583484411239624, "rewards_train/margins_1": 0.24798238277435303, "rewards_train/margins_2": 0.9781012535095215, "step": 139 }, { "epoch": 0.42, "logps_train/policy_1_2": -171.66632080078125, "logps_train/policy_1_l": -244.73043823242188, "logps_train/policy_1_w": -87.9355697631836, "logps_train/policy_2_2": -135.09266662597656, "logps_train/policy_2_w": -124.03916931152344, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 0.5630563497543335, "rewards_train/1-l": -4.2355451583862305, "rewards_train/1-w": 1.2892556190490723, "rewards_train/2-2": 1.5001084804534912, "rewards_train/2-w": 0.6445202827453613, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.524800777435303, "rewards_train/margins_1": 0.7261992692947388, "rewards_train/margins_2": 0.8555881977081299, "step": 139 }, { "epoch": 0.42, "logps_train/policy_1_2": -79.01565551757812, "logps_train/policy_1_l": -66.94874572753906, "logps_train/policy_1_w": -72.54450988769531, "logps_train/policy_2_2": -69.69772338867188, "logps_train/policy_2_w": -84.92697143554688, "logps_train/ref_1_2": -88.5, "logps_train/ref_1_l": -59.5, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -82.0, "logps_train/ref_2_w": -96.5, "rewards_train/1-2": 0.9602510929107666, "rewards_train/1-l": -0.7677261829376221, "rewards_train/1-w": 1.4721113443374634, "rewards_train/2-2": 1.260891318321228, "rewards_train/2-w": 1.1953890323638916, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.2398375272750854, "rewards_train/margins_1": 0.5118602514266968, "rewards_train/margins_2": 0.06550228595733643, "step": 139 }, { "epoch": 0.42, "logps_train/policy_1_2": -129.27352905273438, "logps_train/policy_1_l": -186.64108276367188, "logps_train/policy_1_w": -142.47488403320312, "logps_train/policy_2_2": -96.97421264648438, "logps_train/policy_2_w": -183.82936096191406, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 0.7967686653137207, "rewards_train/1-l": -1.9040484428405762, "rewards_train/1-w": 1.8337610960006714, "rewards_train/2-2": 1.8058013916015625, "rewards_train/2-w": 0.9709697961807251, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7378095388412476, "rewards_train/margins_1": 1.0369924306869507, "rewards_train/margins_2": 0.8348315954208374, "step": 139 }, { "epoch": 0.42, "logps_train/policy_1_2": -135.29843139648438, "logps_train/policy_1_l": -149.62918090820312, "logps_train/policy_1_w": -124.55543518066406, "logps_train/policy_2_2": -108.06495666503906, "logps_train/policy_2_w": -153.86453247070312, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 0.45921939611434937, "rewards_train/1-l": -0.9855755567550659, "rewards_train/1-w": 2.0458245277404785, "rewards_train/2-2": 1.020066738128662, "rewards_train/2-w": 1.5545622110366821, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 3.0314000844955444, "rewards_train/margins_1": 1.5866051316261292, "rewards_train/margins_2": -0.53449547290802, "step": 139 }, { "epoch": 0.42, "learning_rate": 4.665063509461098e-06, "loss": 0.7915, "step": 140 }, { "epoch": 0.42, "logps_train/policy_1_2": -128.096923828125, "logps_train/policy_1_l": -68.5382308959961, "logps_train/policy_1_w": -82.78025817871094, "logps_train/policy_2_2": -99.620849609375, "logps_train/policy_2_w": -116.92387390136719, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -60.25, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 0.34811916947364807, "rewards_train/1-l": -0.8224754333496094, "rewards_train/1-w": 1.3946301937103271, "rewards_train/2-2": 1.1418211460113525, "rewards_train/2-w": 0.6951130628585815, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.2171056270599365, "rewards_train/margins_1": 1.046511024236679, "rewards_train/margins_2": 0.446708083152771, "step": 140 }, { "epoch": 0.42, "logps_train/policy_1_2": -95.33749389648438, "logps_train/policy_1_l": -80.06001281738281, "logps_train/policy_1_w": -75.4399642944336, "logps_train/policy_2_2": -77.01629638671875, "logps_train/policy_2_w": -103.9762191772461, "logps_train/ref_1_2": -107.0, "logps_train/ref_1_l": -71.0, "logps_train/ref_1_w": -85.5, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -107.5, "rewards_train/1-2": 1.1693753004074097, "rewards_train/1-l": -0.9166949391365051, "rewards_train/1-w": 0.9638155698776245, "rewards_train/2-2": 1.400714635848999, "rewards_train/2-w": 0.353940486907959, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.8805105090141296, "rewards_train/margins_1": -0.20555973052978516, "rewards_train/margins_2": 1.04677414894104, "step": 140 }, { "epoch": 0.42, "logps_train/policy_1_2": -149.4759979248047, "logps_train/policy_1_l": -77.98895263671875, "logps_train/policy_1_w": -86.63975524902344, "logps_train/policy_2_2": -119.27110290527344, "logps_train/policy_2_w": -110.40322875976562, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -99.5, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": 0.5227124094963074, "rewards_train/1-l": -0.03170725703239441, "rewards_train/1-w": 1.2545795440673828, "rewards_train/2-2": 1.4502332210540771, "rewards_train/2-w": 0.5860441327095032, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.2862868010997772, "rewards_train/margins_1": 0.7318671345710754, "rewards_train/margins_2": 0.864189088344574, "step": 140 }, { "epoch": 0.42, "logps_train/policy_1_2": -249.03060913085938, "logps_train/policy_1_l": -203.02719116210938, "logps_train/policy_1_w": -186.56280517578125, "logps_train/policy_2_2": -220.9759979248047, "logps_train/policy_2_w": -232.16085815429688, "logps_train/ref_1_2": -255.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -236.0, "logps_train/ref_2_w": -247.0, "rewards_train/1-2": 0.5531883239746094, "rewards_train/1-l": -1.500276803970337, "rewards_train/1-w": 2.75465726852417, "rewards_train/2-2": 1.460212230682373, "rewards_train/2-w": 1.458913803100586, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.254934072494507, "rewards_train/margins_1": 2.2014689445495605, "rewards_train/margins_2": 0.0012984275817871094, "step": 140 }, { "epoch": 0.42, "logps_train/policy_1_2": -219.54779052734375, "logps_train/policy_1_l": -264.27008056640625, "logps_train/policy_1_w": -152.41958618164062, "logps_train/policy_2_2": -185.47634887695312, "logps_train/policy_2_w": -185.12078857421875, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -238.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.4889724254608154, "rewards_train/1-l": -2.5996646881103516, "rewards_train/1-w": 2.2338218688964844, "rewards_train/2-2": 2.371115207672119, "rewards_train/2-w": 1.5174130201339722, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.833486557006836, "rewards_train/margins_1": 0.744849443435669, "rewards_train/margins_2": 0.853702187538147, "step": 140 }, { "epoch": 0.42, "logps_train/policy_1_2": -132.95057678222656, "logps_train/policy_1_l": -127.63031768798828, "logps_train/policy_1_w": -177.23117065429688, "logps_train/policy_2_2": -102.69615936279297, "logps_train/policy_2_w": -201.91351318359375, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -201.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 0.8072864413261414, "rewards_train/1-l": -1.2506294250488281, "rewards_train/1-w": 2.3776636123657227, "rewards_train/2-2": 1.9733526706695557, "rewards_train/2-w": 1.765679955482483, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.628293037414551, "rewards_train/margins_1": 1.5703771710395813, "rewards_train/margins_2": 0.20767271518707275, "step": 140 }, { "epoch": 0.42, "logps_train/policy_1_2": -181.8683319091797, "logps_train/policy_1_l": -219.73013305664062, "logps_train/policy_1_w": -161.30422973632812, "logps_train/policy_2_2": -159.56973266601562, "logps_train/policy_2_w": -186.43475341796875, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 1.516291618347168, "rewards_train/1-l": -1.9628584384918213, "rewards_train/1-w": 2.5914506912231445, "rewards_train/2-2": 2.2539637088775635, "rewards_train/2-w": 1.66433584690094, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.554309129714966, "rewards_train/margins_1": 1.0751590728759766, "rewards_train/margins_2": 0.5896278619766235, "step": 140 }, { "epoch": 0.42, "logps_train/policy_1_2": -202.287841796875, "logps_train/policy_1_l": -195.99325561523438, "logps_train/policy_1_w": -174.81497192382812, "logps_train/policy_2_2": -163.131591796875, "logps_train/policy_2_w": -223.2511749267578, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -235.0, "rewards_train/1-2": 1.9751228094100952, "rewards_train/1-l": -1.7243252992630005, "rewards_train/1-w": 2.4841270446777344, "rewards_train/2-2": 2.5548102855682373, "rewards_train/2-w": 1.1592564582824707, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.208452343940735, "rewards_train/margins_1": 0.5090042352676392, "rewards_train/margins_2": 1.3955538272857666, "step": 140 }, { "epoch": 0.42, "logps_train/policy_1_2": -206.820556640625, "logps_train/policy_1_l": -176.84951782226562, "logps_train/policy_1_w": -152.3074493408203, "logps_train/policy_2_2": -169.95924377441406, "logps_train/policy_2_w": -180.01148986816406, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.5757564306259155, "rewards_train/1-l": -1.0365149974822998, "rewards_train/1-w": 1.6786303520202637, "rewards_train/2-2": 2.6915764808654785, "rewards_train/2-w": 1.1207261085510254, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7151453495025635, "rewards_train/margins_1": 0.10287392139434814, "rewards_train/margins_2": 1.5708503723144531, "step": 141 }, { "epoch": 0.42, "logps_train/policy_1_2": -180.2276611328125, "logps_train/policy_1_l": -154.92337036132812, "logps_train/policy_1_w": -134.565185546875, "logps_train/policy_2_2": -150.72586059570312, "logps_train/policy_2_w": -168.23794555664062, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.264392614364624, "rewards_train/1-l": -1.8119666576385498, "rewards_train/1-w": 2.281372308731079, "rewards_train/2-2": 2.086447238922119, "rewards_train/2-w": 1.353939175605774, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.093338966369629, "rewards_train/margins_1": 1.016979694366455, "rewards_train/margins_2": 0.7325080633163452, "step": 141 }, { "epoch": 0.42, "logps_train/policy_1_2": -274.8487243652344, "logps_train/policy_1_l": -202.7144012451172, "logps_train/policy_1_w": -187.80435180664062, "logps_train/policy_2_2": -229.49546813964844, "logps_train/policy_2_w": -238.5733642578125, "logps_train/ref_1_2": -286.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -207.0, "logps_train/ref_2_2": -254.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 1.1916903257369995, "rewards_train/1-l": -1.0964394807815552, "rewards_train/1-w": 1.9297202825546265, "rewards_train/2-2": 2.3957653045654297, "rewards_train/2-w": 0.7129774689674377, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.0261597633361816, "rewards_train/margins_1": 0.738029956817627, "rewards_train/margins_2": 1.682787835597992, "step": 141 }, { "epoch": 0.42, "logps_train/policy_1_2": -280.125244140625, "logps_train/policy_1_l": -192.87570190429688, "logps_train/policy_1_w": -185.1259765625, "logps_train/policy_2_2": -222.71812438964844, "logps_train/policy_2_w": -247.4193115234375, "logps_train/ref_1_2": -292.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -256.0, "logps_train/ref_2_w": -264.0, "rewards_train/1-2": 1.3312277793884277, "rewards_train/1-l": -1.0146483182907104, "rewards_train/1-w": 2.67529296875, "rewards_train/2-2": 3.2469375133514404, "rewards_train/2-w": 1.7479116916656494, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6899412870407104, "rewards_train/margins_1": 1.3440651893615723, "rewards_train/margins_2": 1.499025821685791, "step": 141 }, { "epoch": 0.42, "logps_train/policy_1_2": -139.25347900390625, "logps_train/policy_1_l": -257.7470397949219, "logps_train/policy_1_w": -136.9622344970703, "logps_train/policy_2_2": -107.56645202636719, "logps_train/policy_2_w": -186.5250701904297, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.5691828727722168, "rewards_train/1-l": -3.1740198135375977, "rewards_train/1-w": 2.9959640502929688, "rewards_train/2-2": 2.1359333992004395, "rewards_train/2-w": 1.7263983488082886, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.169983863830566, "rewards_train/margins_1": 1.426781177520752, "rewards_train/margins_2": 0.4095350503921509, "step": 141 }, { "epoch": 0.42, "logps_train/policy_1_2": -133.8646697998047, "logps_train/policy_1_l": -130.34405517578125, "logps_train/policy_1_w": -131.25730895996094, "logps_train/policy_2_2": -109.07640075683594, "logps_train/policy_2_w": -165.7039031982422, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 0.8678297996520996, "rewards_train/1-l": -1.011749029159546, "rewards_train/1-w": 1.9883320331573486, "rewards_train/2-2": 1.462867259979248, "rewards_train/2-w": 1.1921093463897705, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.0000810623168945, "rewards_train/margins_1": 1.120502233505249, "rewards_train/margins_2": 0.27075791358947754, "step": 141 }, { "epoch": 0.42, "logps_train/policy_1_2": -193.23768615722656, "logps_train/policy_1_l": -204.95864868164062, "logps_train/policy_1_w": -144.18463134765625, "logps_train/policy_2_2": -153.7297821044922, "logps_train/policy_2_w": -187.53402709960938, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.6809186935424805, "rewards_train/1-l": -2.5025060176849365, "rewards_train/1-w": 2.8248953819274902, "rewards_train/2-2": 2.5449907779693604, "rewards_train/2-w": 1.3594865798950195, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.327401399612427, "rewards_train/margins_1": 1.1439766883850098, "rewards_train/margins_2": 1.1855041980743408, "step": 141 }, { "epoch": 0.42, "logps_train/policy_1_2": -169.61143493652344, "logps_train/policy_1_l": -218.23956298828125, "logps_train/policy_1_w": -216.86111450195312, "logps_train/policy_2_2": -149.2273406982422, "logps_train/policy_2_w": -243.0434112548828, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -248.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -262.0, "rewards_train/1-2": 2.287294626235962, "rewards_train/1-l": -1.5489554405212402, "rewards_train/1-w": 3.051387071609497, "rewards_train/2-2": 2.7022664546966553, "rewards_train/2-w": 1.9878462553024292, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.600342512130737, "rewards_train/margins_1": 0.7640924453735352, "rewards_train/margins_2": 0.7144201993942261, "step": 141 }, { "epoch": 0.43, "learning_rate": 4.652609029418389e-06, "loss": 0.7976, "step": 142 }, { "epoch": 0.43, "logps_train/policy_1_2": -210.27284240722656, "logps_train/policy_1_l": -194.39996337890625, "logps_train/policy_1_w": -128.48519897460938, "logps_train/policy_2_2": -184.00540161132812, "logps_train/policy_2_w": -156.88229370117188, "logps_train/ref_1_2": -231.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 2.088341474533081, "rewards_train/1-l": -1.9056215286254883, "rewards_train/1-w": 2.288980484008789, "rewards_train/2-2": 2.965085506439209, "rewards_train/2-w": 1.6992714405059814, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.194602012634277, "rewards_train/margins_1": 0.200639009475708, "rewards_train/margins_2": 1.2658140659332275, "step": 142 }, { "epoch": 0.43, "logps_train/policy_1_2": -94.4801254272461, "logps_train/policy_1_l": -100.67315673828125, "logps_train/policy_1_w": -93.97142028808594, "logps_train/policy_2_2": -78.07032775878906, "logps_train/policy_2_w": -118.32560729980469, "logps_train/ref_1_2": -107.0, "logps_train/ref_1_l": -86.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.248862624168396, "rewards_train/1-l": -1.442583680152893, "rewards_train/1-w": 1.8887957334518433, "rewards_train/2-2": 1.5085922479629517, "rewards_train/2-w": 1.0818917751312256, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3313794136047363, "rewards_train/margins_1": 0.6399331092834473, "rewards_train/margins_2": 0.4267004728317261, "step": 142 }, { "epoch": 0.43, "logps_train/policy_1_2": -115.6473159790039, "logps_train/policy_1_l": -111.88201141357422, "logps_train/policy_1_w": -104.2585220336914, "logps_train/policy_2_2": -90.26211547851562, "logps_train/policy_2_w": -132.74073791503906, "logps_train/ref_1_2": -122.5, "logps_train/ref_1_l": -98.5, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 0.6790184378623962, "rewards_train/1-l": -1.2995299100875854, "rewards_train/1-w": 1.7350856065750122, "rewards_train/2-2": 1.1761314868927002, "rewards_train/2-w": 0.725925624370575, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.0346155166625977, "rewards_train/margins_1": 1.056067168712616, "rewards_train/margins_2": 0.45020586252212524, "step": 142 }, { "epoch": 0.43, "logps_train/policy_1_2": -140.61634826660156, "logps_train/policy_1_l": -82.65795135498047, "logps_train/policy_1_w": -104.79485321044922, "logps_train/policy_2_2": -119.38045501708984, "logps_train/policy_2_w": -133.69512939453125, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -75.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 1.2700055837631226, "rewards_train/1-l": -0.7868890166282654, "rewards_train/1-w": 2.0169990062713623, "rewards_train/2-2": 1.7325596809387207, "rewards_train/2-w": 1.1630053520202637, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.8038880228996277, "rewards_train/margins_1": 0.7469934225082397, "rewards_train/margins_2": 0.569554328918457, "step": 142 }, { "epoch": 0.43, "logps_train/policy_1_2": -123.60752868652344, "logps_train/policy_1_l": -70.25606536865234, "logps_train/policy_1_w": -101.70478057861328, "logps_train/policy_2_2": -104.00006103515625, "logps_train/policy_2_w": -129.66600036621094, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -64.5, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.463465929031372, "rewards_train/1-l": -0.5740442872047424, "rewards_train/1-w": 1.384990930557251, "rewards_train/2-2": 2.012493848800659, "rewards_train/2-w": 0.1005881130695343, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.9590352177619934, "rewards_train/margins_1": -0.0784749984741211, "rewards_train/margins_2": 1.9119057357311249, "step": 142 }, { "epoch": 0.43, "logps_train/policy_1_2": -119.4628677368164, "logps_train/policy_1_l": -100.97158813476562, "logps_train/policy_1_w": -144.76864624023438, "logps_train/policy_2_2": -99.59663391113281, "logps_train/policy_2_w": -163.92202758789062, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 0.9724631309509277, "rewards_train/1-l": -0.6297755241394043, "rewards_train/1-w": 1.4194247722625732, "rewards_train/2-2": 1.5411186218261719, "rewards_train/2-w": 0.9617036581039429, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.0492002964019775, "rewards_train/margins_1": 0.4469616413116455, "rewards_train/margins_2": 0.579414963722229, "step": 142 }, { "epoch": 0.43, "logps_train/policy_1_2": -221.79653930664062, "logps_train/policy_1_l": -210.91995239257812, "logps_train/policy_1_w": -176.76263427734375, "logps_train/policy_2_2": -188.2534637451172, "logps_train/policy_2_w": -213.55328369140625, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -203.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": 1.4140948057174683, "rewards_train/1-l": -1.384181022644043, "rewards_train/1-w": 2.6284244060516357, "rewards_train/2-2": 2.5965285301208496, "rewards_train/2-w": 1.5291448831558228, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.012605428695679, "rewards_train/margins_1": 1.2143296003341675, "rewards_train/margins_2": 1.0673836469650269, "step": 142 }, { "epoch": 0.43, "logps_train/policy_1_2": -104.34872436523438, "logps_train/policy_1_l": -123.02958679199219, "logps_train/policy_1_w": -73.16991424560547, "logps_train/policy_2_2": -78.62108612060547, "logps_train/policy_2_w": -87.4560546875, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -87.5, "logps_train/ref_2_2": -97.5, "logps_train/ref_2_w": -99.5, "rewards_train/1-2": 1.452627182006836, "rewards_train/1-l": -1.7041304111480713, "rewards_train/1-w": 1.4513682126998901, "rewards_train/2-2": 1.8871103525161743, "rewards_train/2-w": 1.1883792877197266, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.1554986238479614, "rewards_train/margins_1": -0.0012589693069458008, "rewards_train/margins_2": 0.6987310647964478, "step": 142 }, { "epoch": 0.43, "logps_train/policy_1_2": -129.57113647460938, "logps_train/policy_1_l": -193.4807891845703, "logps_train/policy_1_w": -130.04135131835938, "logps_train/policy_2_2": -102.42557525634766, "logps_train/policy_2_w": -160.04498291015625, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 0.47726187109947205, "rewards_train/1-l": -2.5174145698547363, "rewards_train/1-w": 2.480238914489746, "rewards_train/2-2": 1.350411295890808, "rewards_train/2-w": 1.7486273050308228, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 4.997653484344482, "rewards_train/margins_1": 2.002977043390274, "rewards_train/margins_2": -0.39821600914001465, "step": 143 }, { "epoch": 0.43, "logps_train/policy_1_2": -132.11061096191406, "logps_train/policy_1_l": -77.65399169921875, "logps_train/policy_1_w": -51.03623962402344, "logps_train/policy_2_2": -98.80775451660156, "logps_train/policy_2_w": -72.51142883300781, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -66.0, "logps_train/ref_1_w": -63.5, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -81.0, "rewards_train/1-2": 0.8748767971992493, "rewards_train/1-l": -1.1677424907684326, "rewards_train/1-w": 1.2510638236999512, "rewards_train/2-2": 1.6961772441864014, "rewards_train/2-w": 0.8816696405410767, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.418806314468384, "rewards_train/margins_1": 0.3761870265007019, "rewards_train/margins_2": 0.8145076036453247, "step": 143 }, { "epoch": 0.43, "logps_train/policy_1_2": -136.83253479003906, "logps_train/policy_1_l": -233.7415771484375, "logps_train/policy_1_w": -130.38168334960938, "logps_train/policy_2_2": -114.65098571777344, "logps_train/policy_2_w": -166.34312438964844, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 0.9800273180007935, "rewards_train/1-l": -2.75140380859375, "rewards_train/1-w": 2.424916982650757, "rewards_train/2-2": 1.5153698921203613, "rewards_train/2-w": 1.3480110168457031, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.176320791244507, "rewards_train/margins_1": 1.4448896646499634, "rewards_train/margins_2": 0.1673588752746582, "step": 143 }, { "epoch": 0.43, "logps_train/policy_1_2": -140.4508514404297, "logps_train/policy_1_l": -224.50025939941406, "logps_train/policy_1_w": -134.02064514160156, "logps_train/policy_2_2": -113.07013702392578, "logps_train/policy_2_w": -165.95138549804688, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.3549144268035889, "rewards_train/1-l": -1.8211205005645752, "rewards_train/1-w": 2.2381694316864014, "rewards_train/2-2": 2.1127126216888428, "rewards_train/2-w": 1.6587677001953125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.059289932250977, "rewards_train/margins_1": 0.8832550048828125, "rewards_train/margins_2": 0.4539449214935303, "step": 143 }, { "epoch": 0.43, "logps_train/policy_1_2": -187.76698303222656, "logps_train/policy_1_l": -120.03652954101562, "logps_train/policy_1_w": -118.68157958984375, "logps_train/policy_2_2": -156.6324005126953, "logps_train/policy_2_w": -148.81040954589844, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.629551649093628, "rewards_train/1-l": -1.1614658832550049, "rewards_train/1-w": 2.1974663734436035, "rewards_train/2-2": 2.5320725440979004, "rewards_train/2-w": 1.17833411693573, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.3589322566986084, "rewards_train/margins_1": 0.5679147243499756, "rewards_train/margins_2": 1.3537384271621704, "step": 143 }, { "epoch": 0.43, "logps_train/policy_1_2": -208.56040954589844, "logps_train/policy_1_l": -143.9234619140625, "logps_train/policy_1_w": -128.3843231201172, "logps_train/policy_2_2": -179.60025024414062, "logps_train/policy_2_w": -157.475341796875, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.8720839023590088, "rewards_train/1-l": -1.0585579872131348, "rewards_train/1-w": 2.129927635192871, "rewards_train/2-2": 2.618100643157959, "rewards_train/2-w": 1.4079349040985107, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.188485622406006, "rewards_train/margins_1": 0.2578437328338623, "rewards_train/margins_2": 1.2101657390594482, "step": 143 }, { "epoch": 0.43, "logps_train/policy_1_2": -210.00074768066406, "logps_train/policy_1_l": -197.85098266601562, "logps_train/policy_1_w": -177.64878845214844, "logps_train/policy_2_2": -165.50448608398438, "logps_train/policy_2_w": -230.11395263671875, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -203.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -239.0, "rewards_train/1-2": 1.9905502796173096, "rewards_train/1-l": -1.589003324508667, "rewards_train/1-w": 2.5491838455200195, "rewards_train/2-2": 3.2276763916015625, "rewards_train/2-w": 0.8589171171188354, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.1381871700286865, "rewards_train/margins_1": 0.55863356590271, "rewards_train/margins_2": 2.368759274482727, "step": 143 }, { "epoch": 0.43, "logps_train/policy_1_2": -209.00411987304688, "logps_train/policy_1_l": -199.35157775878906, "logps_train/policy_1_w": -169.84542846679688, "logps_train/policy_2_2": -179.29885864257812, "logps_train/policy_2_w": -196.1619110107422, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -203.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.8808389902114868, "rewards_train/1-l": -2.330275058746338, "rewards_train/1-w": 2.643582344055176, "rewards_train/2-2": 2.30898118019104, "rewards_train/2-w": 1.8242394924163818, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.973857402801514, "rewards_train/margins_1": 0.762743353843689, "rewards_train/margins_2": 0.4847416877746582, "step": 143 }, { "epoch": 0.43, "learning_rate": 4.6399444585292266e-06, "loss": 0.7991, "step": 144 }, { "epoch": 0.43, "logps_train/policy_1_2": -145.58828735351562, "logps_train/policy_1_l": -234.9217071533203, "logps_train/policy_1_w": -120.8150863647461, "logps_train/policy_2_2": -129.45089721679688, "logps_train/policy_2_w": -145.10647583007812, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.3208577632904053, "rewards_train/1-l": -3.6843581199645996, "rewards_train/1-w": 2.1388039588928223, "rewards_train/2-2": 1.9892865419387817, "rewards_train/2-w": 1.5581027269363403, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.823162078857422, "rewards_train/margins_1": 0.817946195602417, "rewards_train/margins_2": 0.4311838150024414, "step": 144 }, { "epoch": 0.43, "logps_train/policy_1_2": -100.54367065429688, "logps_train/policy_1_l": -116.68690490722656, "logps_train/policy_1_w": -153.9390106201172, "logps_train/policy_2_2": -74.7512435913086, "logps_train/policy_2_w": -190.9424591064453, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -91.5, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.0440703630447388, "rewards_train/1-l": -0.9866600036621094, "rewards_train/1-w": 2.2260208129882812, "rewards_train/2-2": 1.6740944385528564, "rewards_train/2-w": 0.8196215033531189, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2126808166503906, "rewards_train/margins_1": 1.1819504499435425, "rewards_train/margins_2": 0.8544729351997375, "step": 144 }, { "epoch": 0.43, "logps_train/policy_1_2": -95.79635620117188, "logps_train/policy_1_l": -99.1131591796875, "logps_train/policy_1_w": -108.77494049072266, "logps_train/policy_2_2": -85.20710754394531, "logps_train/policy_2_w": -128.84999084472656, "logps_train/ref_1_2": -110.5, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.4758332967758179, "rewards_train/1-l": -0.4988161027431488, "rewards_train/1-w": 1.8021938800811768, "rewards_train/2-2": 1.642570972442627, "rewards_train/2-w": 1.2556259632110596, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.3010099828243256, "rewards_train/margins_1": 0.3263605833053589, "rewards_train/margins_2": 0.3869450092315674, "step": 144 }, { "epoch": 0.43, "logps_train/policy_1_2": -177.86154174804688, "logps_train/policy_1_l": -183.3309783935547, "logps_train/policy_1_w": -129.6320343017578, "logps_train/policy_2_2": -141.40725708007812, "logps_train/policy_2_w": -153.08982849121094, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.1904069185256958, "rewards_train/1-l": -1.5909109115600586, "rewards_train/1-w": 1.5141398906707764, "rewards_train/2-2": 2.319822072982788, "rewards_train/2-w": 1.2300796508789062, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.105050802230835, "rewards_train/margins_1": 0.32373297214508057, "rewards_train/margins_2": 1.0897424221038818, "step": 144 }, { "epoch": 0.43, "logps_train/policy_1_2": -141.03163146972656, "logps_train/policy_1_l": -106.90132141113281, "logps_train/policy_1_w": -75.75007629394531, "logps_train/policy_2_2": -107.18801879882812, "logps_train/policy_2_w": -102.17231750488281, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -89.5, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -106.0, "rewards_train/1-2": 0.733555793762207, "rewards_train/1-l": -1.0846635103225708, "rewards_train/1-w": 1.385148525238037, "rewards_train/2-2": 1.2983858585357666, "rewards_train/2-w": 0.3319876194000244, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.469812035560608, "rewards_train/margins_1": 0.6515927314758301, "rewards_train/margins_2": 0.9663982391357422, "step": 144 }, { "epoch": 0.43, "logps_train/policy_1_2": -182.05125427246094, "logps_train/policy_1_l": -176.8021697998047, "logps_train/policy_1_w": -155.409423828125, "logps_train/policy_2_2": -141.21966552734375, "logps_train/policy_2_w": -204.9949951171875, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 1.6808125972747803, "rewards_train/1-l": -0.9991625547409058, "rewards_train/1-w": 2.2168705463409424, "rewards_train/2-2": 2.4655327796936035, "rewards_train/2-w": 1.089563012123108, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.216033101081848, "rewards_train/margins_1": 0.5360579490661621, "rewards_train/margins_2": 1.3759697675704956, "step": 144 }, { "epoch": 0.43, "logps_train/policy_1_2": -185.9879608154297, "logps_train/policy_1_l": -177.82904052734375, "logps_train/policy_1_w": -137.69589233398438, "logps_train/policy_2_2": -152.77130126953125, "logps_train/policy_2_w": -166.9241943359375, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.9773764610290527, "rewards_train/1-l": -1.7055609226226807, "rewards_train/1-w": 2.226505756378174, "rewards_train/2-2": 3.164666175842285, "rewards_train/2-w": 1.2669540643692017, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9320666790008545, "rewards_train/margins_1": 0.2491292953491211, "rewards_train/margins_2": 1.8977121114730835, "step": 144 }, { "epoch": 0.43, "logps_train/policy_1_2": -60.15114974975586, "logps_train/policy_1_l": -54.98298645019531, "logps_train/policy_1_w": -44.5943489074707, "logps_train/policy_2_2": -49.17317581176758, "logps_train/policy_2_w": -53.839515686035156, "logps_train/ref_1_2": -66.5, "logps_train/ref_1_l": -52.0, "logps_train/ref_1_w": -55.5, "logps_train/ref_2_2": -57.5, "logps_train/ref_2_w": -61.0, "rewards_train/1-2": 0.6528536081314087, "rewards_train/1-l": -0.3067457973957062, "rewards_train/1-w": 1.1030653715133667, "rewards_train/2-2": 0.8295574188232422, "rewards_train/2-w": 0.6965171694755554, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.4098111689090729, "rewards_train/margins_1": 0.450211763381958, "rewards_train/margins_2": 0.13304024934768677, "step": 144 }, { "epoch": 0.43, "logps_train/policy_1_2": -197.6431884765625, "logps_train/policy_1_l": -195.80250549316406, "logps_train/policy_1_w": -150.18505859375, "logps_train/policy_2_2": -161.9310302734375, "logps_train/policy_2_w": -193.2724609375, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 1.8388066291809082, "rewards_train/1-l": -1.9865000247955322, "rewards_train/1-w": 2.0877437591552734, "rewards_train/2-2": 2.660022020339966, "rewards_train/2-w": 0.9633793234825134, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.074243783950806, "rewards_train/margins_1": 0.24893712997436523, "rewards_train/margins_2": 1.6966426968574524, "step": 145 }, { "epoch": 0.43, "logps_train/policy_1_2": -207.76734924316406, "logps_train/policy_1_l": -108.27617645263672, "logps_train/policy_1_w": -103.68252563476562, "logps_train/policy_2_2": -163.2886962890625, "logps_train/policy_2_w": -131.634765625, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.4857655763626099, "rewards_train/1-l": -0.9166799783706665, "rewards_train/1-w": 1.6951758861541748, "rewards_train/2-2": 2.603943109512329, "rewards_train/2-w": 0.9668946266174316, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.6118558645248413, "rewards_train/margins_1": 0.20941030979156494, "rewards_train/margins_2": 1.6370484828948975, "step": 145 }, { "epoch": 0.43, "logps_train/policy_1_2": -204.42001342773438, "logps_train/policy_1_l": -196.85418701171875, "logps_train/policy_1_w": -197.71665954589844, "logps_train/policy_2_2": -176.57781982421875, "logps_train/policy_2_w": -223.68267822265625, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.895498514175415, "rewards_train/1-l": -2.0135436058044434, "rewards_train/1-w": 2.284583330154419, "rewards_train/2-2": 2.848468780517578, "rewards_train/2-w": 1.3567315340042114, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.298126935958862, "rewards_train/margins_1": 0.3890848159790039, "rewards_train/margins_2": 1.4917372465133667, "step": 145 }, { "epoch": 0.43, "logps_train/policy_1_2": -176.132080078125, "logps_train/policy_1_l": -192.27549743652344, "logps_train/policy_1_w": -148.58544921875, "logps_train/policy_2_2": -143.69129943847656, "logps_train/policy_2_w": -183.8464813232422, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": 1.2996821403503418, "rewards_train/1-l": -1.9175902605056763, "rewards_train/1-w": 1.9664556980133057, "rewards_train/2-2": 2.0185651779174805, "rewards_train/2-w": 1.177851915359497, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.884045958518982, "rewards_train/margins_1": 0.6667735576629639, "rewards_train/margins_2": 0.8407132625579834, "step": 145 }, { "epoch": 0.43, "logps_train/policy_1_2": -123.8727035522461, "logps_train/policy_1_l": -160.58984375, "logps_train/policy_1_w": -111.97605895996094, "logps_train/policy_2_2": -106.62550354003906, "logps_train/policy_2_w": -134.2374267578125, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 1.610385775566101, "rewards_train/1-l": -1.4068119525909424, "rewards_train/1-w": 1.8969252109527588, "rewards_train/2-2": 1.9311997890472412, "rewards_train/2-w": 1.462976098060608, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.303737163543701, "rewards_train/margins_1": 0.2865394353866577, "rewards_train/margins_2": 0.4682236909866333, "step": 145 }, { "epoch": 0.43, "logps_train/policy_1_2": -195.17715454101562, "logps_train/policy_1_l": -185.46498107910156, "logps_train/policy_1_w": -123.93072509765625, "logps_train/policy_2_2": -162.81060791015625, "logps_train/policy_2_w": -151.30117797851562, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.2135343551635742, "rewards_train/1-l": -1.2957167625427246, "rewards_train/1-w": 2.10341215133667, "rewards_train/2-2": 1.9283148050308228, "rewards_train/2-w": 1.2679293155670166, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.3991289138793945, "rewards_train/margins_1": 0.8898777961730957, "rewards_train/margins_2": 0.6603854894638062, "step": 145 }, { "epoch": 0.43, "logps_train/policy_1_2": -119.38001251220703, "logps_train/policy_1_l": -135.35922241210938, "logps_train/policy_1_w": -68.56321716308594, "logps_train/policy_2_2": -98.16561889648438, "logps_train/policy_2_w": -87.05850219726562, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": 1.4057488441467285, "rewards_train/1-l": -1.2665866613388062, "rewards_train/1-w": 0.9298110604286194, "rewards_train/2-2": 1.8838284015655518, "rewards_train/2-w": 0.5804779529571533, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.1963977217674255, "rewards_train/margins_1": -0.47593778371810913, "rewards_train/margins_2": 1.3033504486083984, "step": 145 }, { "epoch": 0.43, "logps_train/policy_1_2": -87.47537231445312, "logps_train/policy_1_l": -79.73609924316406, "logps_train/policy_1_w": -83.6766128540039, "logps_train/policy_2_2": -74.12554931640625, "logps_train/policy_2_w": -97.54432678222656, "logps_train/ref_1_2": -102.5, "logps_train/ref_1_l": -74.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -90.5, "logps_train/ref_2_w": -110.5, "rewards_train/1-2": 1.4915255308151245, "rewards_train/1-l": -0.5130630731582642, "rewards_train/1-w": 1.774135947227478, "rewards_train/2-2": 1.6507267951965332, "rewards_train/2-w": 1.2760355472564697, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.287199020385742, "rewards_train/margins_1": 0.2826104164123535, "rewards_train/margins_2": 0.3746912479400635, "step": 145 }, { "epoch": 0.44, "learning_rate": 4.627071032833401e-06, "loss": 0.8278, "step": 146 }, { "epoch": 0.44, "logps_train/policy_1_2": -185.724609375, "logps_train/policy_1_l": -204.30209350585938, "logps_train/policy_1_w": -141.86932373046875, "logps_train/policy_2_2": -144.6829833984375, "logps_train/policy_2_w": -188.17288208007812, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.0416011810302734, "rewards_train/1-l": -1.4981765747070312, "rewards_train/1-w": 2.5419745445251465, "rewards_train/2-2": 1.889514684677124, "rewards_train/2-w": 1.406148910522461, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.040151119232178, "rewards_train/margins_1": 1.500373363494873, "rewards_train/margins_2": 0.4833657741546631, "step": 146 }, { "epoch": 0.44, "logps_train/policy_1_2": -87.1810073852539, "logps_train/policy_1_l": -74.09324645996094, "logps_train/policy_1_w": -50.188270568847656, "logps_train/policy_2_2": -72.79922485351562, "logps_train/policy_2_w": -74.59761047363281, "logps_train/ref_1_2": -94.5, "logps_train/ref_1_l": -71.5, "logps_train/ref_1_w": -62.5, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -83.0, "rewards_train/1-2": 0.7307270169258118, "rewards_train/1-l": -0.2499494105577469, "rewards_train/1-w": 1.2139854431152344, "rewards_train/2-2": 1.227182149887085, "rewards_train/2-w": 0.7980512380599976, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.4639348536729813, "rewards_train/margins_1": 0.4832584261894226, "rewards_train/margins_2": 0.4291309118270874, "step": 146 }, { "epoch": 0.44, "logps_train/policy_1_2": -104.54098510742188, "logps_train/policy_1_l": -78.44412231445312, "logps_train/policy_1_w": -98.0048828125, "logps_train/policy_2_2": -84.60362243652344, "logps_train/policy_2_w": -127.82289123535156, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -74.5, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -98.5, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.7927768230438232, "rewards_train/1-l": -0.399880588054657, "rewards_train/1-w": 1.453418254852295, "rewards_train/2-2": 1.3701061010360718, "rewards_train/2-w": 0.8247424364089966, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.853298842906952, "rewards_train/margins_1": 0.6606414318084717, "rewards_train/margins_2": 0.5453636646270752, "step": 146 }, { "epoch": 0.44, "logps_train/policy_1_2": -178.9688262939453, "logps_train/policy_1_l": -247.31884765625, "logps_train/policy_1_w": -161.40078735351562, "logps_train/policy_2_2": -152.95965576171875, "logps_train/policy_2_w": -193.7652130126953, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -235.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.0624920129776, "rewards_train/1-l": -1.2838400602340698, "rewards_train/1-w": 1.3942970037460327, "rewards_train/2-2": 1.4729304313659668, "rewards_train/2-w": 0.6453538537025452, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.6781370639801025, "rewards_train/margins_1": 0.3318049907684326, "rewards_train/margins_2": 0.8275765776634216, "step": 146 }, { "epoch": 0.44, "logps_train/policy_1_2": -90.75365447998047, "logps_train/policy_1_l": -98.67845153808594, "logps_train/policy_1_w": -44.147499084472656, "logps_train/policy_2_2": -72.30509185791016, "logps_train/policy_2_w": -57.735618591308594, "logps_train/ref_1_2": -98.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -50.0, "logps_train/ref_2_2": -84.5, "logps_train/ref_2_w": -62.0, "rewards_train/1-2": 0.7074471712112427, "rewards_train/1-l": -0.6098858118057251, "rewards_train/1-w": 0.6061973571777344, "rewards_train/2-2": 1.2069909572601318, "rewards_train/2-w": 0.4147196412086487, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.2160831689834595, "rewards_train/margins_1": -0.1012498140335083, "rewards_train/margins_2": 0.7922713160514832, "step": 146 }, { "epoch": 0.44, "logps_train/policy_1_2": -100.06086730957031, "logps_train/policy_1_l": -73.11076354980469, "logps_train/policy_1_w": -71.07275390625, "logps_train/policy_2_2": -86.47725677490234, "logps_train/policy_2_w": -81.36368560791016, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -67.0, "logps_train/ref_1_w": -89.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -96.5, "rewards_train/1-2": 1.3861002922058105, "rewards_train/1-l": -0.6008468866348267, "rewards_train/1-w": 1.7548344135284424, "rewards_train/2-2": 1.7585240602493286, "rewards_train/2-w": 1.5077229738235474, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.355681300163269, "rewards_train/margins_1": 0.36873412132263184, "rewards_train/margins_2": 0.25080108642578125, "step": 146 }, { "epoch": 0.44, "logps_train/policy_1_2": -288.79803466796875, "logps_train/policy_1_l": -205.58401489257812, "logps_train/policy_1_w": -247.7029266357422, "logps_train/policy_2_2": -241.03851318359375, "logps_train/policy_2_w": -299.88653564453125, "logps_train/ref_1_2": -304.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -288.0, "logps_train/ref_2_2": -276.0, "logps_train/ref_2_w": -324.0, "rewards_train/1-2": 1.529569387435913, "rewards_train/1-l": -1.8746123313903809, "rewards_train/1-w": 4.017207145690918, "rewards_train/2-2": 3.3523998260498047, "rewards_train/2-w": 2.3988442420959473, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.891819477081299, "rewards_train/margins_1": 2.487637758255005, "rewards_train/margins_2": 0.9535555839538574, "step": 146 }, { "epoch": 0.44, "logps_train/policy_1_2": -189.08518981933594, "logps_train/policy_1_l": -183.3954620361328, "logps_train/policy_1_w": -116.13167572021484, "logps_train/policy_2_2": -158.12721252441406, "logps_train/policy_2_w": -143.135986328125, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.1977314949035645, "rewards_train/1-l": -2.187983512878418, "rewards_train/1-w": 2.0606601238250732, "rewards_train/2-2": 2.1372792720794678, "rewards_train/2-w": 1.4629632234573364, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.248643636703491, "rewards_train/margins_1": 0.8629286289215088, "rewards_train/margins_2": 0.6743160486221313, "step": 146 }, { "epoch": 0.44, "logps_train/policy_1_2": -141.6620330810547, "logps_train/policy_1_l": -200.3751983642578, "logps_train/policy_1_w": -174.37060546875, "logps_train/policy_2_2": -120.38465881347656, "logps_train/policy_2_w": -199.78805541992188, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.758796215057373, "rewards_train/1-l": -2.0390825271606445, "rewards_train/1-w": 2.53481388092041, "rewards_train/2-2": 2.426378011703491, "rewards_train/2-w": 1.8649442195892334, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.573896408081055, "rewards_train/margins_1": 0.7760176658630371, "rewards_train/margins_2": 0.5614337921142578, "step": 147 }, { "epoch": 0.44, "logps_train/policy_1_2": -225.94833374023438, "logps_train/policy_1_l": -198.17263793945312, "logps_train/policy_1_w": -216.51950073242188, "logps_train/policy_2_2": -193.5822296142578, "logps_train/policy_2_w": -250.83834838867188, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -245.0, "logps_train/ref_2_2": -224.0, "logps_train/ref_2_w": -270.0, "rewards_train/1-2": 2.348917245864868, "rewards_train/1-l": -1.4211698770523071, "rewards_train/1-w": 2.8027381896972656, "rewards_train/2-2": 3.049199104309082, "rewards_train/2-w": 1.8849148750305176, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.223908066749573, "rewards_train/margins_1": 0.45382094383239746, "rewards_train/margins_2": 1.1642842292785645, "step": 147 }, { "epoch": 0.44, "logps_train/policy_1_2": -255.07456970214844, "logps_train/policy_1_l": -251.24839782714844, "logps_train/policy_1_w": -183.64447021484375, "logps_train/policy_2_2": -219.87020874023438, "logps_train/policy_2_w": -230.55618286132812, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -233.0, "logps_train/ref_1_w": -211.0, "logps_train/ref_2_2": -247.0, "logps_train/ref_2_w": -245.0, "rewards_train/1-2": 1.2464497089385986, "rewards_train/1-l": -1.8264014720916748, "rewards_train/1-w": 2.6996140480041504, "rewards_train/2-2": 2.7184481620788574, "rewards_train/2-w": 1.43109929561615, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.526015520095825, "rewards_train/margins_1": 1.4531643390655518, "rewards_train/margins_2": 1.2873488664627075, "step": 147 }, { "epoch": 0.44, "logps_train/policy_1_2": -105.66319274902344, "logps_train/policy_1_l": -87.0450439453125, "logps_train/policy_1_w": -95.36615753173828, "logps_train/policy_2_2": -87.16339111328125, "logps_train/policy_2_w": -118.54238891601562, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -124.5, "rewards_train/1-2": 1.5633680820465088, "rewards_train/1-l": -0.47559866309165955, "rewards_train/1-w": 1.3887749910354614, "rewards_train/2-2": 2.0125672817230225, "rewards_train/2-w": 0.6061124801635742, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.864373654127121, "rewards_train/margins_1": -0.17459309101104736, "rewards_train/margins_2": 1.4064548015594482, "step": 147 }, { "epoch": 0.44, "logps_train/policy_1_2": -126.39746856689453, "logps_train/policy_1_l": -156.3414306640625, "logps_train/policy_1_w": -124.4324951171875, "logps_train/policy_2_2": -105.416259765625, "logps_train/policy_2_w": -154.8345184326172, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -121.5, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.0922844409942627, "rewards_train/1-l": -1.4890263080596924, "rewards_train/1-w": 1.666515588760376, "rewards_train/2-2": 1.6122806072235107, "rewards_train/2-w": 0.9087364077568054, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1555418968200684, "rewards_train/margins_1": 0.5742311477661133, "rewards_train/margins_2": 0.7035441994667053, "step": 147 }, { "epoch": 0.44, "logps_train/policy_1_2": -165.10531616210938, "logps_train/policy_1_l": -167.6697235107422, "logps_train/policy_1_w": -87.75637817382812, "logps_train/policy_2_2": -130.95974731445312, "logps_train/policy_2_w": -115.31436157226562, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 0.8769690990447998, "rewards_train/1-l": -1.9058399200439453, "rewards_train/1-w": 1.633737564086914, "rewards_train/2-2": 1.7635955810546875, "rewards_train/2-w": 1.0185635089874268, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5395774841308594, "rewards_train/margins_1": 0.7567684650421143, "rewards_train/margins_2": 0.7450320720672607, "step": 147 }, { "epoch": 0.44, "logps_train/policy_1_2": -86.38310241699219, "logps_train/policy_1_l": -95.09147644042969, "logps_train/policy_1_w": -70.47626495361328, "logps_train/policy_2_2": -73.5090103149414, "logps_train/policy_2_w": -89.93238830566406, "logps_train/ref_1_2": -95.5, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -82.5, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -97.0, "rewards_train/1-2": 0.9165717959403992, "rewards_train/1-l": -0.7782882452011108, "rewards_train/1-w": 1.1996393203735352, "rewards_train/2-2": 1.2432639598846436, "rewards_train/2-w": 0.7071514129638672, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.977927565574646, "rewards_train/margins_1": 0.283067524433136, "rewards_train/margins_2": 0.5361125469207764, "step": 147 }, { "epoch": 0.44, "logps_train/policy_1_2": -207.196533203125, "logps_train/policy_1_l": -162.60284423828125, "logps_train/policy_1_w": -195.12847900390625, "logps_train/policy_2_2": -179.42446899414062, "logps_train/policy_2_w": -229.29351806640625, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -211.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 2.044018268585205, "rewards_train/1-l": -1.5700483322143555, "rewards_train/1-w": 2.819964647293091, "rewards_train/2-2": 3.1610686779022217, "rewards_train/2-w": 1.8534600734710693, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.390012979507446, "rewards_train/margins_1": 0.7759463787078857, "rewards_train/margins_2": 1.3076086044311523, "step": 147 }, { "epoch": 0.44, "learning_rate": 4.613990008754565e-06, "loss": 0.8436, "step": 148 }, { "epoch": 0.44, "logps_train/policy_1_2": -122.54252624511719, "logps_train/policy_1_l": -176.25210571289062, "logps_train/policy_1_w": -125.1658935546875, "logps_train/policy_2_2": -99.94547271728516, "logps_train/policy_2_w": -160.34295654296875, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 0.8621543645858765, "rewards_train/1-l": -1.8654438257217407, "rewards_train/1-w": 1.1287237405776978, "rewards_train/2-2": 1.3624842166900635, "rewards_train/2-w": 0.5547669529914856, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.9941675662994385, "rewards_train/margins_1": 0.2665693759918213, "rewards_train/margins_2": 0.8077172636985779, "step": 148 }, { "epoch": 0.44, "logps_train/policy_1_2": -185.2732391357422, "logps_train/policy_1_l": -172.82608032226562, "logps_train/policy_1_w": -198.39199829101562, "logps_train/policy_2_2": -146.14418029785156, "logps_train/policy_2_w": -244.24424743652344, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -234.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 1.2523633241653442, "rewards_train/1-l": -1.1732324361801147, "rewards_train/1-w": 3.520176649093628, "rewards_train/2-2": 2.077770233154297, "rewards_train/2-w": 2.281825065612793, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.693409085273743, "rewards_train/margins_1": 2.2678133249282837, "rewards_train/margins_2": -0.2040548324584961, "step": 148 }, { "epoch": 0.44, "logps_train/policy_1_2": -113.65678405761719, "logps_train/policy_1_l": -96.26216125488281, "logps_train/policy_1_w": -83.70829010009766, "logps_train/policy_2_2": -89.81330108642578, "logps_train/policy_2_w": -109.03504943847656, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -99.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 0.6358845829963684, "rewards_train/1-l": -1.5031695365905762, "rewards_train/1-w": 1.5492392778396606, "rewards_train/2-2": 1.4999198913574219, "rewards_train/2-w": 0.8785264492034912, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.052408814430237, "rewards_train/margins_1": 0.9133546948432922, "rewards_train/margins_2": 0.6213934421539307, "step": 148 }, { "epoch": 0.44, "logps_train/policy_1_2": -183.48046875, "logps_train/policy_1_l": -143.6934814453125, "logps_train/policy_1_w": -250.07452392578125, "logps_train/policy_2_2": -159.64439392089844, "logps_train/policy_2_w": -285.0208435058594, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -284.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -308.0, "rewards_train/1-2": 1.4988276958465576, "rewards_train/1-l": -0.7216918468475342, "rewards_train/1-w": 3.3820018768310547, "rewards_train/2-2": 2.2066538333892822, "rewards_train/2-w": 2.3756508827209473, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.103693723678589, "rewards_train/margins_1": 1.883174180984497, "rewards_train/margins_2": -0.16899704933166504, "step": 148 }, { "epoch": 0.44, "logps_train/policy_1_2": -131.18418884277344, "logps_train/policy_1_l": -149.08355712890625, "logps_train/policy_1_w": -101.40250396728516, "logps_train/policy_2_2": -103.01152801513672, "logps_train/policy_2_w": -119.95040130615234, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.838612675666809, "rewards_train/1-l": -1.6372612714767456, "rewards_train/1-w": 2.1152184009552, "rewards_train/2-2": 2.471503496170044, "rewards_train/2-w": 1.5999795198440552, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.752479672431946, "rewards_train/margins_1": 0.2766057252883911, "rewards_train/margins_2": 0.8715239763259888, "step": 148 }, { "epoch": 0.44, "logps_train/policy_1_2": -247.7533721923828, "logps_train/policy_1_l": -250.44137573242188, "logps_train/policy_1_w": -171.28781127929688, "logps_train/policy_2_2": -213.14736938476562, "logps_train/policy_2_w": -215.08139038085938, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -234.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -235.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 1.102005958557129, "rewards_train/1-l": -1.683786153793335, "rewards_train/1-w": 2.49621844291687, "rewards_train/2-2": 2.1669023036956787, "rewards_train/2-w": 1.2504560947418213, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.180004596710205, "rewards_train/margins_1": 1.3942124843597412, "rewards_train/margins_2": 0.9164462089538574, "step": 148 }, { "epoch": 0.44, "logps_train/policy_1_2": -171.9235382080078, "logps_train/policy_1_l": -156.91983032226562, "logps_train/policy_1_w": -95.45750427246094, "logps_train/policy_2_2": -151.24835205078125, "logps_train/policy_2_w": -114.61488342285156, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.7724895477294922, "rewards_train/1-l": -1.7943272590637207, "rewards_train/1-w": 1.2155776023864746, "rewards_train/2-2": 2.360321044921875, "rewards_train/2-w": 0.8728874921798706, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0099048614501953, "rewards_train/margins_1": -0.5569119453430176, "rewards_train/margins_2": 1.4874335527420044, "step": 148 }, { "epoch": 0.44, "logps_train/policy_1_2": -100.51436614990234, "logps_train/policy_1_l": -101.36231994628906, "logps_train/policy_1_w": -88.09420776367188, "logps_train/policy_2_2": -73.04679107666016, "logps_train/policy_2_w": -113.14608764648438, "logps_train/ref_1_2": -111.5, "logps_train/ref_1_l": -89.5, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -90.5, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 1.0954384803771973, "rewards_train/1-l": -1.1760756969451904, "rewards_train/1-w": 1.7093291282653809, "rewards_train/2-2": 1.7531334161758423, "rewards_train/2-w": 1.0853909254074097, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8854048252105713, "rewards_train/margins_1": 0.6138906478881836, "rewards_train/margins_2": 0.6677424907684326, "step": 148 }, { "epoch": 0.45, "logps_train/policy_1_2": -157.82748413085938, "logps_train/policy_1_l": -182.3674774169922, "logps_train/policy_1_w": -142.0294952392578, "logps_train/policy_2_2": -130.64422607421875, "logps_train/policy_2_w": -175.65350341796875, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.5110028982162476, "rewards_train/1-l": -1.043778657913208, "rewards_train/1-w": 2.4931445121765137, "rewards_train/2-2": 2.269951820373535, "rewards_train/2-w": 1.5596494674682617, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5369231700897217, "rewards_train/margins_1": 0.9821416139602661, "rewards_train/margins_2": 0.7103023529052734, "step": 149 }, { "epoch": 0.45, "logps_train/policy_1_2": -107.72991180419922, "logps_train/policy_1_l": -100.86932373046875, "logps_train/policy_1_w": -78.2927017211914, "logps_train/policy_2_2": -88.23210144042969, "logps_train/policy_2_w": -100.1648178100586, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -86.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 0.9622626304626465, "rewards_train/1-l": -1.4283392429351807, "rewards_train/1-w": 2.305690288543701, "rewards_train/2-2": 1.5931963920593262, "rewards_train/2-w": 1.614768147468567, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.734029531478882, "rewards_train/margins_1": 1.3434276580810547, "rewards_train/margins_2": -0.021571755409240723, "step": 149 }, { "epoch": 0.45, "logps_train/policy_1_2": -130.66680908203125, "logps_train/policy_1_l": -132.03146362304688, "logps_train/policy_1_w": -112.39049530029297, "logps_train/policy_2_2": -110.83654022216797, "logps_train/policy_2_w": -137.48196411132812, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.2778496742248535, "rewards_train/1-l": -1.923067331314087, "rewards_train/1-w": 1.5750126838684082, "rewards_train/2-2": 2.1423230171203613, "rewards_train/2-w": 0.6900848150253296, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.498080015182495, "rewards_train/margins_1": 0.2971630096435547, "rewards_train/margins_2": 1.4522382020950317, "step": 149 }, { "epoch": 0.45, "logps_train/policy_1_2": -173.5074005126953, "logps_train/policy_1_l": -225.42449951171875, "logps_train/policy_1_w": -196.250244140625, "logps_train/policy_2_2": -140.1063232421875, "logps_train/policy_2_w": -240.677490234375, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -223.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 2.096135139465332, "rewards_train/1-l": -2.3338561058044434, "rewards_train/1-w": 2.707787036895752, "rewards_train/2-2": 2.832336902618408, "rewards_train/2-w": 1.3260016441345215, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.041643142700195, "rewards_train/margins_1": 0.6116518974304199, "rewards_train/margins_2": 1.5063352584838867, "step": 149 }, { "epoch": 0.45, "logps_train/policy_1_2": -142.01202392578125, "logps_train/policy_1_l": -168.5655517578125, "logps_train/policy_1_w": -129.78128051757812, "logps_train/policy_2_2": -123.06318664550781, "logps_train/policy_2_w": -164.2438507080078, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": 1.6558287143707275, "rewards_train/1-l": -2.0157337188720703, "rewards_train/1-w": 2.8249969482421875, "rewards_train/2-2": 2.3151659965515137, "rewards_train/2-w": 1.6896774768829346, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.840730667114258, "rewards_train/margins_1": 1.16916823387146, "rewards_train/margins_2": 0.6254885196685791, "step": 149 }, { "epoch": 0.45, "logps_train/policy_1_2": -259.0760803222656, "logps_train/policy_1_l": -219.39630126953125, "logps_train/policy_1_w": -190.16806030273438, "logps_train/policy_2_2": -223.37783813476562, "logps_train/policy_2_w": -223.83743286132812, "logps_train/ref_1_2": -276.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -253.0, "logps_train/ref_2_w": -243.0, "rewards_train/1-2": 1.7986416816711426, "rewards_train/1-l": -2.22713041305542, "rewards_train/1-w": 2.6902263164520264, "rewards_train/2-2": 2.9622161388397217, "rewards_train/2-w": 1.8896936178207397, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.917356729507446, "rewards_train/margins_1": 0.8915846347808838, "rewards_train/margins_2": 1.072522521018982, "step": 149 }, { "epoch": 0.45, "logps_train/policy_1_2": -101.13951110839844, "logps_train/policy_1_l": -73.16752624511719, "logps_train/policy_1_w": -95.59660339355469, "logps_train/policy_2_2": -70.39103698730469, "logps_train/policy_2_w": -137.78533935546875, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -69.5, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.1172990798950195, "rewards_train/1-l": -0.3468312919139862, "rewards_train/1-w": 2.0747146606445312, "rewards_train/2-2": 1.5655839443206787, "rewards_train/2-w": 0.8199034929275513, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.4215459525585175, "rewards_train/margins_1": 0.9574155807495117, "rewards_train/margins_2": 0.7456804513931274, "step": 149 }, { "epoch": 0.45, "logps_train/policy_1_2": -144.39764404296875, "logps_train/policy_1_l": -145.6185760498047, "logps_train/policy_1_w": -124.37630462646484, "logps_train/policy_2_2": -109.68161010742188, "logps_train/policy_2_w": -145.88369750976562, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.6430485248565674, "rewards_train/1-l": -1.769670844078064, "rewards_train/1-w": 2.1014318466186523, "rewards_train/2-2": 2.812307357788086, "rewards_train/2-w": 1.5241295099258423, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8711026906967163, "rewards_train/margins_1": 0.45838332176208496, "rewards_train/margins_2": 1.2881778478622437, "step": 149 }, { "epoch": 0.45, "learning_rate": 4.600702662977611e-06, "loss": 0.7787, "step": 150 }, { "epoch": 0.45, "logps_train/policy_1_2": -125.32676696777344, "logps_train/policy_1_l": -87.86895751953125, "logps_train/policy_1_w": -81.97280883789062, "logps_train/policy_2_2": -99.46870422363281, "logps_train/policy_2_w": -106.82292175292969, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -94.0, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": 0.9705941677093506, "rewards_train/1-l": -0.9906553030014038, "rewards_train/1-w": 1.2341156005859375, "rewards_train/2-2": 1.688382863998413, "rewards_train/2-w": 0.7990561723709106, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.2247709035873413, "rewards_train/margins_1": 0.2635214328765869, "rewards_train/margins_2": 0.8893266916275024, "step": 150 }, { "epoch": 0.45, "logps_train/policy_1_2": -135.0525665283203, "logps_train/policy_1_l": -187.96788024902344, "logps_train/policy_1_w": -156.93653869628906, "logps_train/policy_2_2": -104.92201232910156, "logps_train/policy_2_w": -200.003173828125, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.8463057279586792, "rewards_train/1-l": -2.231163501739502, "rewards_train/1-w": 3.089158296585083, "rewards_train/2-2": 2.2218613624572754, "rewards_train/2-w": 1.349682331085205, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.320321798324585, "rewards_train/margins_1": 1.2428525686264038, "rewards_train/margins_2": 0.8721790313720703, "step": 150 }, { "epoch": 0.45, "logps_train/policy_1_2": -145.8889617919922, "logps_train/policy_1_l": -176.98257446289062, "logps_train/policy_1_w": -140.6083526611328, "logps_train/policy_2_2": -107.14704895019531, "logps_train/policy_2_w": -195.05776977539062, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.161104679107666, "rewards_train/1-l": -1.7013821601867676, "rewards_train/1-w": 3.0766654014587402, "rewards_train/2-2": 1.8423264026641846, "rewards_train/2-w": 1.1379730701446533, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.778047561645508, "rewards_train/margins_1": 1.9155607223510742, "rewards_train/margins_2": 0.7043533325195312, "step": 150 }, { "epoch": 0.45, "logps_train/policy_1_2": -261.87890625, "logps_train/policy_1_l": -198.26651000976562, "logps_train/policy_1_w": -148.63214111328125, "logps_train/policy_2_2": -222.2598876953125, "logps_train/policy_2_w": -178.21856689453125, "logps_train/ref_1_2": -280.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -254.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.7433595657348633, "rewards_train/1-l": -1.5368080139160156, "rewards_train/1-w": 2.42741060256958, "rewards_train/2-2": 3.061511993408203, "rewards_train/2-w": 1.7515807151794434, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.9642186164855957, "rewards_train/margins_1": 0.6840510368347168, "rewards_train/margins_2": 1.3099312782287598, "step": 150 }, { "epoch": 0.45, "logps_train/policy_1_2": -204.89068603515625, "logps_train/policy_1_l": -255.76394653320312, "logps_train/policy_1_w": -240.25875854492188, "logps_train/policy_2_2": -178.4888458251953, "logps_train/policy_2_w": -269.0613708496094, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -236.0, "logps_train/ref_1_w": -262.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -280.0, "rewards_train/1-2": 1.9953057765960693, "rewards_train/1-l": -2.063894033432007, "rewards_train/1-w": 2.1690452098846436, "rewards_train/2-2": 2.4011149406433105, "rewards_train/2-w": 1.1790189743041992, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.23293924331665, "rewards_train/margins_1": 0.17373943328857422, "rewards_train/margins_2": 1.2220959663391113, "step": 150 }, { "epoch": 0.45, "logps_train/policy_1_2": -161.46588134765625, "logps_train/policy_1_l": -183.0956573486328, "logps_train/policy_1_w": -180.31971740722656, "logps_train/policy_2_2": -134.51034545898438, "logps_train/policy_2_w": -215.72610473632812, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -201.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 1.026458978652954, "rewards_train/1-l": -1.1306602954864502, "rewards_train/1-w": 2.025841236114502, "rewards_train/2-2": 1.8118566274642944, "rewards_train/2-w": 0.98442143201828, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.156501531600952, "rewards_train/margins_1": 0.9993822574615479, "rewards_train/margins_2": 0.8274351954460144, "step": 150 }, { "epoch": 0.45, "logps_train/policy_1_2": -201.97988891601562, "logps_train/policy_1_l": -125.49345397949219, "logps_train/policy_1_w": -117.60503387451172, "logps_train/policy_2_2": -171.85751342773438, "logps_train/policy_2_w": -142.84974670410156, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.7238856554031372, "rewards_train/1-l": -0.9567672610282898, "rewards_train/1-w": 2.206489086151123, "rewards_train/2-2": 2.8361239433288574, "rewards_train/2-w": 1.1716660261154175, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.163256347179413, "rewards_train/margins_1": 0.48260343074798584, "rewards_train/margins_2": 1.66445791721344, "step": 150 }, { "epoch": 0.45, "logps_train/policy_1_2": -130.07424926757812, "logps_train/policy_1_l": -123.4989013671875, "logps_train/policy_1_w": -86.64342498779297, "logps_train/policy_2_2": -104.361572265625, "logps_train/policy_2_w": -112.88249206542969, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 1.3160121440887451, "rewards_train/1-l": -1.4459840059280396, "rewards_train/1-w": 1.4922983646392822, "rewards_train/2-2": 1.9122800827026367, "rewards_train/2-w": 0.8258135318756104, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.9382823705673218, "rewards_train/margins_1": 0.1762862205505371, "rewards_train/margins_2": 1.0864665508270264, "step": 150 }, { "epoch": 0.45, "logps_train/policy_1_2": -160.76492309570312, "logps_train/policy_1_l": -188.88296508789062, "logps_train/policy_1_w": -122.69500732421875, "logps_train/policy_2_2": -131.51641845703125, "logps_train/policy_2_w": -148.45504760742188, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.214133381843567, "rewards_train/1-l": -1.4664208889007568, "rewards_train/1-w": 1.583624243736267, "rewards_train/2-2": 2.068671226501465, "rewards_train/2-w": 0.9365259408950806, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.050045132637024, "rewards_train/margins_1": 0.3694908618927002, "rewards_train/margins_2": 1.1321452856063843, "step": 151 }, { "epoch": 0.45, "logps_train/policy_1_2": -196.7083740234375, "logps_train/policy_1_l": -217.18545532226562, "logps_train/policy_1_w": -130.906494140625, "logps_train/policy_2_2": -156.355712890625, "logps_train/policy_2_w": -169.42578125, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.2791622877120972, "rewards_train/1-l": -1.8497974872589111, "rewards_train/1-w": 2.4499752521514893, "rewards_train/2-2": 2.4519286155700684, "rewards_train/2-w": 1.6074225902557373, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.2997727394104, "rewards_train/margins_1": 1.170812964439392, "rewards_train/margins_2": 0.844506025314331, "step": 151 }, { "epoch": 0.45, "logps_train/policy_1_2": -152.388671875, "logps_train/policy_1_l": -119.01004791259766, "logps_train/policy_1_w": -127.12567138671875, "logps_train/policy_2_2": -130.44371032714844, "logps_train/policy_2_w": -143.5745391845703, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.3080081939697266, "rewards_train/1-l": -1.1228797435760498, "rewards_train/1-w": 2.0124332904815674, "rewards_train/2-2": 2.183753490447998, "rewards_train/2-w": 1.6394202709197998, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.135313034057617, "rewards_train/margins_1": 0.7044250965118408, "rewards_train/margins_2": 0.5443332195281982, "step": 151 }, { "epoch": 0.45, "logps_train/policy_1_2": -121.9334716796875, "logps_train/policy_1_l": -121.79763793945312, "logps_train/policy_1_w": -77.35479736328125, "logps_train/policy_2_2": -82.89564514160156, "logps_train/policy_2_w": -100.13825225830078, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -109.5, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -106.5, "rewards_train/1-2": 1.4613404273986816, "rewards_train/1-l": -1.2287873029708862, "rewards_train/1-w": 1.1399108171463013, "rewards_train/2-2": 2.377622604370117, "rewards_train/2-w": 0.6560965776443481, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.3686981201171875, "rewards_train/margins_1": -0.32142961025238037, "rewards_train/margins_2": 1.721526026725769, "step": 151 }, { "epoch": 0.45, "logps_train/policy_1_2": -104.87425231933594, "logps_train/policy_1_l": -123.12786865234375, "logps_train/policy_1_w": -126.0218734741211, "logps_train/policy_2_2": -89.01374816894531, "logps_train/policy_2_w": -141.98828125, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 0.970948338508606, "rewards_train/1-l": -0.952264666557312, "rewards_train/1-w": 1.4400001764297485, "rewards_train/2-2": 1.0914957523345947, "rewards_train/2-w": 0.7167966961860657, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.3922648429870605, "rewards_train/margins_1": 0.4690518379211426, "rewards_train/margins_2": 0.37469905614852905, "step": 151 }, { "epoch": 0.45, "logps_train/policy_1_2": -89.30014038085938, "logps_train/policy_1_l": -72.78245544433594, "logps_train/policy_1_w": -109.37797546386719, "logps_train/policy_2_2": -70.51571655273438, "logps_train/policy_2_w": -140.35260009765625, "logps_train/ref_1_2": -101.5, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.2199859619140625, "rewards_train/1-l": -0.25793302059173584, "rewards_train/1-w": 1.5504837036132812, "rewards_train/2-2": 1.6749906539916992, "rewards_train/2-w": 0.774115264415741, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.808416724205017, "rewards_train/margins_1": 0.33049774169921875, "rewards_train/margins_2": 0.9008753895759583, "step": 151 }, { "epoch": 0.45, "logps_train/policy_1_2": -119.70875549316406, "logps_train/policy_1_l": -107.0631103515625, "logps_train/policy_1_w": -95.83480072021484, "logps_train/policy_2_2": -93.2539291381836, "logps_train/policy_2_w": -120.42410278320312, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.9900620579719543, "rewards_train/1-l": -1.432970643043518, "rewards_train/1-w": 2.5083165168762207, "rewards_train/2-2": 1.966794490814209, "rewards_train/2-w": 1.516965389251709, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.9412871599197388, "rewards_train/margins_1": 1.5182544589042664, "rewards_train/margins_2": 0.4498291015625, "step": 151 }, { "epoch": 0.45, "logps_train/policy_1_2": -172.18980407714844, "logps_train/policy_1_l": -192.04742431640625, "logps_train/policy_1_w": -134.658203125, "logps_train/policy_2_2": -141.75018310546875, "logps_train/policy_2_w": -172.48712158203125, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.0888320207595825, "rewards_train/1-l": -1.8043506145477295, "rewards_train/1-w": 2.484179973602295, "rewards_train/2-2": 2.0058412551879883, "rewards_train/2-w": 1.2762863636016846, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.288530588150024, "rewards_train/margins_1": 1.3953479528427124, "rewards_train/margins_2": 0.7295548915863037, "step": 151 }, { "epoch": 0.46, "learning_rate": 4.587210292324062e-06, "loss": 0.7545, "step": 152 }, { "epoch": 0.46, "logps_train/policy_1_2": -166.67819213867188, "logps_train/policy_1_l": -186.46441650390625, "logps_train/policy_1_w": -182.00808715820312, "logps_train/policy_2_2": -146.66490173339844, "logps_train/policy_2_w": -207.78701782226562, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -235.0, "rewards_train/1-2": 2.3665566444396973, "rewards_train/1-l": -1.3206603527069092, "rewards_train/1-w": 3.5648155212402344, "rewards_train/2-2": 3.043079376220703, "rewards_train/2-w": 2.6900482177734375, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.8854758739471436, "rewards_train/margins_1": 1.198258876800537, "rewards_train/margins_2": 0.3530311584472656, "step": 152 }, { "epoch": 0.46, "logps_train/policy_1_2": -96.86993408203125, "logps_train/policy_1_l": -140.8533935546875, "logps_train/policy_1_w": -90.2175064086914, "logps_train/policy_2_2": -75.87228393554688, "logps_train/policy_2_w": -118.02813720703125, "logps_train/ref_1_2": -107.5, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 1.0519709587097168, "rewards_train/1-l": -1.7761601209640503, "rewards_train/1-w": 1.5259064435958862, "rewards_train/2-2": 1.5539827346801758, "rewards_train/2-w": 0.6948428153991699, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.3020665645599365, "rewards_train/margins_1": 0.47393548488616943, "rewards_train/margins_2": 0.8591399192810059, "step": 152 }, { "epoch": 0.46, "logps_train/policy_1_2": -101.21246337890625, "logps_train/policy_1_l": -130.25201416015625, "logps_train/policy_1_w": -44.58198547363281, "logps_train/policy_2_2": -79.0448989868164, "logps_train/policy_2_w": -57.729000091552734, "logps_train/ref_1_2": -111.5, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -57.0, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -66.5, "rewards_train/1-2": 1.011565923690796, "rewards_train/1-l": -0.8287176489830017, "rewards_train/1-w": 1.248246669769287, "rewards_train/2-2": 1.698635458946228, "rewards_train/2-w": 0.8798341751098633, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.076964318752289, "rewards_train/margins_1": 0.2366807460784912, "rewards_train/margins_2": 0.8188012838363647, "step": 152 }, { "epoch": 0.46, "logps_train/policy_1_2": -96.41224670410156, "logps_train/policy_1_l": -126.99327087402344, "logps_train/policy_1_w": -102.20492553710938, "logps_train/policy_2_2": -79.55667114257812, "logps_train/policy_2_w": -120.35616302490234, "logps_train/ref_1_2": -110.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -117.5, "logps_train/ref_2_2": -94.5, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.3322126865386963, "rewards_train/1-l": -0.9519637823104858, "rewards_train/1-w": 1.5295078754425049, "rewards_train/2-2": 1.474020004272461, "rewards_train/2-w": 0.9565708637237549, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.4814716577529907, "rewards_train/margins_1": 0.1972951889038086, "rewards_train/margins_2": 0.517449140548706, "step": 152 }, { "epoch": 0.46, "logps_train/policy_1_2": -161.87393188476562, "logps_train/policy_1_l": -202.9439239501953, "logps_train/policy_1_w": -143.77398681640625, "logps_train/policy_2_2": -116.8982925415039, "logps_train/policy_2_w": -197.3389129638672, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.6469804048538208, "rewards_train/1-l": -1.100642442703247, "rewards_train/1-w": 2.2913525104522705, "rewards_train/2-2": 2.750795364379883, "rewards_train/2-w": 0.906733512878418, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.3919949531555176, "rewards_train/margins_1": 0.6443721055984497, "rewards_train/margins_2": 1.8440618515014648, "step": 152 }, { "epoch": 0.46, "logps_train/policy_1_2": -201.98582458496094, "logps_train/policy_1_l": -176.1741943359375, "logps_train/policy_1_w": -156.60325622558594, "logps_train/policy_2_2": -157.989501953125, "logps_train/policy_2_w": -193.18710327148438, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.1654797792434692, "rewards_train/1-l": -1.5514034032821655, "rewards_train/1-w": 2.8138928413391113, "rewards_train/2-2": 2.4963624477386475, "rewards_train/2-w": 1.8664450645446777, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.365296244621277, "rewards_train/margins_1": 1.648413062095642, "rewards_train/margins_2": 0.6299173831939697, "step": 152 }, { "epoch": 0.46, "logps_train/policy_1_2": -213.29840087890625, "logps_train/policy_1_l": -275.44573974609375, "logps_train/policy_1_w": -133.42132568359375, "logps_train/policy_2_2": -183.74795532226562, "logps_train/policy_2_w": -168.30859375, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -252.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.4857832193374634, "rewards_train/1-l": -2.3758256435394287, "rewards_train/1-w": 2.6250548362731934, "rewards_train/2-2": 2.378329038619995, "rewards_train/2-w": 1.7441399097442627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.000880479812622, "rewards_train/margins_1": 1.13927161693573, "rewards_train/margins_2": 0.6341891288757324, "step": 152 }, { "epoch": 0.46, "logps_train/policy_1_2": -134.5628662109375, "logps_train/policy_1_l": -186.41390991210938, "logps_train/policy_1_w": -116.41429138183594, "logps_train/policy_2_2": -112.15824890136719, "logps_train/policy_2_w": -134.96478271484375, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.871447205543518, "rewards_train/1-l": -1.5257664918899536, "rewards_train/1-w": 2.0947036743164062, "rewards_train/2-2": 2.379878044128418, "rewards_train/2-w": 1.5597718954086304, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.62047016620636, "rewards_train/margins_1": 0.22325646877288818, "rewards_train/margins_2": 0.8201061487197876, "step": 152 }, { "epoch": 0.46, "logps_train/policy_1_2": -224.3734130859375, "logps_train/policy_1_l": -209.18482971191406, "logps_train/policy_1_w": -167.17100524902344, "logps_train/policy_2_2": -202.23533630371094, "logps_train/policy_2_w": -191.8656005859375, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -191.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -230.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.9767217636108398, "rewards_train/1-l": -1.8016853332519531, "rewards_train/1-w": 1.9703994989395142, "rewards_train/2-2": 2.7592787742614746, "rewards_train/2-w": 1.3821887969970703, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7720848321914673, "rewards_train/margins_1": -0.006322264671325684, "rewards_train/margins_2": 1.3770899772644043, "step": 153 }, { "epoch": 0.46, "logps_train/policy_1_2": -204.73312377929688, "logps_train/policy_1_l": -145.53431701660156, "logps_train/policy_1_w": -106.68836975097656, "logps_train/policy_2_2": -164.1028289794922, "logps_train/policy_2_w": -134.3052520751953, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.1411405801773071, "rewards_train/1-l": -1.6856590509414673, "rewards_train/1-w": 1.8985453844070435, "rewards_train/2-2": 2.2059273719787598, "rewards_train/2-w": 0.9284586906433105, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.5842044353485107, "rewards_train/margins_1": 0.7574048042297363, "rewards_train/margins_2": 1.2774686813354492, "step": 153 }, { "epoch": 0.46, "logps_train/policy_1_2": -93.35203552246094, "logps_train/policy_1_l": -132.31906127929688, "logps_train/policy_1_w": -112.77737426757812, "logps_train/policy_2_2": -75.7546615600586, "logps_train/policy_2_w": -135.87240600585938, "logps_train/ref_1_2": -105.5, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.2214369773864746, "rewards_train/1-l": -1.2065401077270508, "rewards_train/1-w": 2.339449405670166, "rewards_train/2-2": 1.6182842254638672, "rewards_train/2-w": 1.5986974239349365, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.545989513397217, "rewards_train/margins_1": 1.1180124282836914, "rewards_train/margins_2": 0.019586801528930664, "step": 153 }, { "epoch": 0.46, "logps_train/policy_1_2": -200.52171325683594, "logps_train/policy_1_l": -154.2669677734375, "logps_train/policy_1_w": -105.26126861572266, "logps_train/policy_2_2": -169.6749267578125, "logps_train/policy_2_w": -128.8925018310547, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.5626720190048218, "rewards_train/1-l": -1.309339165687561, "rewards_train/1-w": 1.8144984245300293, "rewards_train/2-2": 2.5973517894744873, "rewards_train/2-w": 1.2669999599456787, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1238375902175903, "rewards_train/margins_1": 0.2518264055252075, "rewards_train/margins_2": 1.3303518295288086, "step": 153 }, { "epoch": 0.46, "logps_train/policy_1_2": -161.27694702148438, "logps_train/policy_1_l": -154.8472900390625, "logps_train/policy_1_w": -139.16888427734375, "logps_train/policy_2_2": -141.9837188720703, "logps_train/policy_2_w": -153.63604736328125, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.539492130279541, "rewards_train/1-l": -1.724571943283081, "rewards_train/1-w": 1.9737361669540405, "rewards_train/2-2": 2.1891281604766846, "rewards_train/2-w": 1.5098319053649902, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6983081102371216, "rewards_train/margins_1": 0.4342440366744995, "rewards_train/margins_2": 0.6792962551116943, "step": 153 }, { "epoch": 0.46, "logps_train/policy_1_2": -222.01214599609375, "logps_train/policy_1_l": -219.62484741210938, "logps_train/policy_1_w": -138.61550903320312, "logps_train/policy_2_2": -177.57501220703125, "logps_train/policy_2_w": -176.10130310058594, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -193.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.8612849712371826, "rewards_train/1-l": -2.6542811393737793, "rewards_train/1-w": 2.5005593299865723, "rewards_train/2-2": 2.9331231117248535, "rewards_train/2-w": 1.4398695230484009, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.154840469360352, "rewards_train/margins_1": 0.6392743587493896, "rewards_train/margins_2": 1.4932535886764526, "step": 153 }, { "epoch": 0.46, "logps_train/policy_1_2": -100.80987548828125, "logps_train/policy_1_l": -115.26265716552734, "logps_train/policy_1_w": -135.43141174316406, "logps_train/policy_2_2": -81.06861877441406, "logps_train/policy_2_w": -170.84619140625, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -101.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 1.6153011322021484, "rewards_train/1-l": -1.0206016302108765, "rewards_train/1-w": 2.2342026233673096, "rewards_train/2-2": 1.9685289859771729, "rewards_train/2-w": 0.41538023948669434, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.254804253578186, "rewards_train/margins_1": 0.6189014911651611, "rewards_train/margins_2": 1.5531487464904785, "step": 153 }, { "epoch": 0.46, "logps_train/policy_1_2": -82.8018798828125, "logps_train/policy_1_l": -104.8249740600586, "logps_train/policy_1_w": -152.39431762695312, "logps_train/policy_2_2": -69.63330078125, "logps_train/policy_2_w": -179.90138244628906, "logps_train/ref_1_2": -90.0, "logps_train/ref_1_l": -92.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -80.5, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 0.7291873693466187, "rewards_train/1-l": -1.2824976444244385, "rewards_train/1-w": 2.3332250118255615, "rewards_train/2-2": 1.0858886241912842, "rewards_train/2-w": 1.3895487785339355, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.61572265625, "rewards_train/margins_1": 1.6040376424789429, "rewards_train/margins_2": -0.30366015434265137, "step": 153 }, { "epoch": 0.46, "learning_rate": 4.573514213625505e-06, "loss": 0.7713, "step": 154 }, { "epoch": 0.46, "logps_train/policy_1_2": -186.04823303222656, "logps_train/policy_1_l": -241.8809051513672, "logps_train/policy_1_w": -153.69869995117188, "logps_train/policy_2_2": -151.1987762451172, "logps_train/policy_2_w": -188.64877319335938, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": 1.6404893398284912, "rewards_train/1-l": -1.523247241973877, "rewards_train/1-w": 2.3043484687805176, "rewards_train/2-2": 2.334028720855713, "rewards_train/2-w": 1.024186611175537, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8275957107543945, "rewards_train/margins_1": 0.6638591289520264, "rewards_train/margins_2": 1.3098421096801758, "step": 154 }, { "epoch": 0.46, "logps_train/policy_1_2": -223.61358642578125, "logps_train/policy_1_l": -195.34954833984375, "logps_train/policy_1_w": -130.69952392578125, "logps_train/policy_2_2": -191.4303436279297, "logps_train/policy_2_w": -169.68789672851562, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -221.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.8316118717193604, "rewards_train/1-l": -2.2865161895751953, "rewards_train/1-w": 2.0847344398498535, "rewards_train/2-2": 2.9241533279418945, "rewards_train/2-w": 1.2827738523483276, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.371250629425049, "rewards_train/margins_1": 0.25312256813049316, "rewards_train/margins_2": 1.641379475593567, "step": 154 }, { "epoch": 0.46, "logps_train/policy_1_2": -169.33627319335938, "logps_train/policy_1_l": -188.31973266601562, "logps_train/policy_1_w": -142.0492706298828, "logps_train/policy_2_2": -142.70846557617188, "logps_train/policy_2_w": -176.12391662597656, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.3023098707199097, "rewards_train/1-l": -1.7854398488998413, "rewards_train/1-w": 2.237651824951172, "rewards_train/2-2": 2.0572776794433594, "rewards_train/2-w": 1.2965917587280273, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.023091673851013, "rewards_train/margins_1": 0.9353419542312622, "rewards_train/margins_2": 0.760685920715332, "step": 154 }, { "epoch": 0.46, "logps_train/policy_1_2": -224.5548858642578, "logps_train/policy_1_l": -155.70770263671875, "logps_train/policy_1_w": -199.54356384277344, "logps_train/policy_2_2": -194.498291015625, "logps_train/policy_2_w": -229.80328369140625, "logps_train/ref_1_2": -243.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -223.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 1.809159755706787, "rewards_train/1-l": -1.0764343738555908, "rewards_train/1-w": 1.9056048393249512, "rewards_train/2-2": 2.8124754428863525, "rewards_train/2-w": 1.0022884607315063, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.982039213180542, "rewards_train/margins_1": 0.09644508361816406, "rewards_train/margins_2": 1.8101869821548462, "step": 154 }, { "epoch": 0.46, "logps_train/policy_1_2": -183.70584106445312, "logps_train/policy_1_l": -292.6693115234375, "logps_train/policy_1_w": -167.119873046875, "logps_train/policy_2_2": -150.1529083251953, "logps_train/policy_2_w": -209.64285278320312, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -262.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -225.0, "rewards_train/1-2": 1.2372289896011353, "rewards_train/1-l": -3.0294294357299805, "rewards_train/1-w": 2.4442625045776367, "rewards_train/2-2": 2.169865608215332, "rewards_train/2-w": 1.5005594491958618, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.473691940307617, "rewards_train/margins_1": 1.2070335149765015, "rewards_train/margins_2": 0.6693061590194702, "step": 154 }, { "epoch": 0.46, "logps_train/policy_1_2": -156.2918243408203, "logps_train/policy_1_l": -121.74365997314453, "logps_train/policy_1_w": -77.12916564941406, "logps_train/policy_2_2": -125.51017761230469, "logps_train/policy_2_w": -96.10438537597656, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -89.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -103.0, "rewards_train/1-2": 0.8098793625831604, "rewards_train/1-l": -1.5287609100341797, "rewards_train/1-w": 1.1849358081817627, "rewards_train/2-2": 1.9950761795043945, "rewards_train/2-w": 0.7087024450302124, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.7136967182159424, "rewards_train/margins_1": 0.3750564455986023, "rewards_train/margins_2": 1.2863737344741821, "step": 154 }, { "epoch": 0.46, "logps_train/policy_1_2": -146.08604431152344, "logps_train/policy_1_l": -109.05152893066406, "logps_train/policy_1_w": -141.71902465820312, "logps_train/policy_2_2": -113.507080078125, "logps_train/policy_2_w": -158.3028564453125, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 0.8773329257965088, "rewards_train/1-l": -0.29655909538269043, "rewards_train/1-w": 1.8521220684051514, "rewards_train/2-2": 1.7055420875549316, "rewards_train/2-w": 1.5540902614593506, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.148681163787842, "rewards_train/margins_1": 0.9747891426086426, "rewards_train/margins_2": 0.15145182609558105, "step": 154 }, { "epoch": 0.46, "logps_train/policy_1_2": -215.56500244140625, "logps_train/policy_1_l": -178.89381408691406, "logps_train/policy_1_w": -169.8843994140625, "logps_train/policy_2_2": -175.6660919189453, "logps_train/policy_2_w": -213.61123657226562, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": 1.2450635433197021, "rewards_train/1-l": -1.155396580696106, "rewards_train/1-w": 3.381873846054077, "rewards_train/2-2": 2.122452974319458, "rewards_train/2-w": 1.504500389099121, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.537270426750183, "rewards_train/margins_1": 2.136810302734375, "rewards_train/margins_2": 0.6179525852203369, "step": 154 }, { "epoch": 0.46, "logps_train/policy_1_2": -170.01718139648438, "logps_train/policy_1_l": -160.67724609375, "logps_train/policy_1_w": -93.97364807128906, "logps_train/policy_2_2": -138.30593872070312, "logps_train/policy_2_w": -120.01176452636719, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.6607825756072998, "rewards_train/1-l": -2.4481940269470215, "rewards_train/1-w": 1.2885723114013672, "rewards_train/2-2": 2.644406795501709, "rewards_train/2-w": 0.5910110473632812, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.7367663383483887, "rewards_train/margins_1": -0.3722102642059326, "rewards_train/margins_2": 2.0533957481384277, "step": 155 }, { "epoch": 0.46, "logps_train/policy_1_2": -103.99004364013672, "logps_train/policy_1_l": -68.93663787841797, "logps_train/policy_1_w": -56.57551574707031, "logps_train/policy_2_2": -85.44688415527344, "logps_train/policy_2_w": -69.4495620727539, "logps_train/ref_1_2": -119.5, "logps_train/ref_1_l": -62.0, "logps_train/ref_1_w": -67.0, "logps_train/ref_2_2": -103.5, "logps_train/ref_2_w": -77.0, "rewards_train/1-2": 1.5588078498840332, "rewards_train/1-l": -0.6768669486045837, "rewards_train/1-w": 1.0627611875534058, "rewards_train/2-2": 1.8131235837936401, "rewards_train/2-w": 0.7253560423851013, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.7396281361579895, "rewards_train/margins_1": -0.49604666233062744, "rewards_train/margins_2": 1.0877675414085388, "step": 155 }, { "epoch": 0.46, "logps_train/policy_1_2": -149.28001403808594, "logps_train/policy_1_l": -156.31964111328125, "logps_train/policy_1_w": -151.0857696533203, "logps_train/policy_2_2": -128.2563018798828, "logps_train/policy_2_w": -176.7455596923828, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": 1.4001234769821167, "rewards_train/1-l": -0.9651670455932617, "rewards_train/1-w": 2.7289233207702637, "rewards_train/2-2": 1.8681204319000244, "rewards_train/2-w": 2.044194221496582, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.6940903663635254, "rewards_train/margins_1": 1.328799843788147, "rewards_train/margins_2": -0.17607378959655762, "step": 155 }, { "epoch": 0.46, "logps_train/policy_1_2": -155.96383666992188, "logps_train/policy_1_l": -127.49224090576172, "logps_train/policy_1_w": -104.03761291503906, "logps_train/policy_2_2": -138.14862060546875, "logps_train/policy_2_w": -123.4021224975586, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.6120138168334961, "rewards_train/1-l": -1.212749719619751, "rewards_train/1-w": 1.5712385177612305, "rewards_train/2-2": 1.260138988494873, "rewards_train/2-w": 1.0261942148208618, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.7839882373809814, "rewards_train/margins_1": 0.9592247009277344, "rewards_train/margins_2": 0.23394477367401123, "step": 155 }, { "epoch": 0.46, "logps_train/policy_1_2": -99.00621032714844, "logps_train/policy_1_l": -96.43904876708984, "logps_train/policy_1_w": -137.94041442871094, "logps_train/policy_2_2": -81.68246459960938, "logps_train/policy_2_w": -167.6324462890625, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.286879062652588, "rewards_train/1-l": -0.5336998701095581, "rewards_train/1-w": 1.3774911165237427, "rewards_train/2-2": 1.3083165884017944, "rewards_train/2-w": 0.5305044651031494, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.9111909866333008, "rewards_train/margins_1": 0.09061205387115479, "rewards_train/margins_2": 0.777812123298645, "step": 155 }, { "epoch": 0.46, "logps_train/policy_1_2": -206.16676330566406, "logps_train/policy_1_l": -144.83364868164062, "logps_train/policy_1_w": -127.57844543457031, "logps_train/policy_2_2": -167.56610107421875, "logps_train/policy_2_w": -169.25851440429688, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.5926986932754517, "rewards_train/1-l": -1.2514808177947998, "rewards_train/1-w": 2.5412282943725586, "rewards_train/2-2": 2.813701868057251, "rewards_train/2-w": 1.6549098491668701, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7927091121673584, "rewards_train/margins_1": 0.9485296010971069, "rewards_train/margins_2": 1.1587920188903809, "step": 155 }, { "epoch": 0.46, "logps_train/policy_1_2": -181.08450317382812, "logps_train/policy_1_l": -186.48580932617188, "logps_train/policy_1_w": -148.3385009765625, "logps_train/policy_2_2": -145.51834106445312, "logps_train/policy_2_w": -181.94253540039062, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.1571745872497559, "rewards_train/1-l": -2.6022915840148926, "rewards_train/1-w": 2.40521240234375, "rewards_train/2-2": 2.376291513442993, "rewards_train/2-w": 1.3768404722213745, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.007503986358643, "rewards_train/margins_1": 1.2480378150939941, "rewards_train/margins_2": 0.9994510412216187, "step": 155 }, { "epoch": 0.46, "logps_train/policy_1_2": -121.27251434326172, "logps_train/policy_1_l": -131.48175048828125, "logps_train/policy_1_w": -130.7602996826172, "logps_train/policy_2_2": -100.48713684082031, "logps_train/policy_2_w": -165.55416870117188, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -113.5, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": 0.9864199161529541, "rewards_train/1-l": -1.1123355627059937, "rewards_train/1-w": 2.3841261863708496, "rewards_train/2-2": 1.2876147031784058, "rewards_train/2-w": 1.5641149282455444, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.4964617490768433, "rewards_train/margins_1": 1.3977062702178955, "rewards_train/margins_2": -0.27650022506713867, "step": 155 }, { "epoch": 0.47, "learning_rate": 4.55961576359508e-06, "loss": 0.8379, "step": 156 }, { "epoch": 0.47, "logps_train/policy_1_2": -142.8141326904297, "logps_train/policy_1_l": -140.65774536132812, "logps_train/policy_1_w": -123.59485626220703, "logps_train/policy_2_2": -115.54965209960938, "logps_train/policy_2_w": -158.8345947265625, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.1236648559570312, "rewards_train/1-l": -1.6962432861328125, "rewards_train/1-w": 2.1266469955444336, "rewards_train/2-2": 2.1995272636413574, "rewards_train/2-w": 1.1138057708740234, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.822890281677246, "rewards_train/margins_1": 1.0029821395874023, "rewards_train/margins_2": 1.085721492767334, "step": 156 }, { "epoch": 0.47, "logps_train/policy_1_2": -175.01315307617188, "logps_train/policy_1_l": -150.3134765625, "logps_train/policy_1_w": -104.65744018554688, "logps_train/policy_2_2": -137.95449829101562, "logps_train/policy_2_w": -142.8477783203125, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 0.5830610990524292, "rewards_train/1-l": -2.2237303256988525, "rewards_train/1-w": 1.8342567682266235, "rewards_train/2-2": 2.007675886154175, "rewards_train/2-w": 0.7183467745780945, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.057987093925476, "rewards_train/margins_1": 1.2511956691741943, "rewards_train/margins_2": 1.2893291115760803, "step": 156 }, { "epoch": 0.47, "logps_train/policy_1_2": -150.141357421875, "logps_train/policy_1_l": -142.94009399414062, "logps_train/policy_1_w": -110.00680541992188, "logps_train/policy_2_2": -101.67739868164062, "logps_train/policy_2_w": -162.46792602539062, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.3585209846496582, "rewards_train/1-l": -1.1479161977767944, "rewards_train/1-w": 2.470803737640381, "rewards_train/2-2": 2.268198013305664, "rewards_train/2-w": 1.399301528930664, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6187199354171753, "rewards_train/margins_1": 1.1122827529907227, "rewards_train/margins_2": 0.868896484375, "step": 156 }, { "epoch": 0.47, "logps_train/policy_1_2": -182.35134887695312, "logps_train/policy_1_l": -177.45086669921875, "logps_train/policy_1_w": -133.07794189453125, "logps_train/policy_2_2": -143.72323608398438, "logps_train/policy_2_w": -182.00144958496094, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.5898648500442505, "rewards_train/1-l": -1.2247744798660278, "rewards_train/1-w": 1.8672049045562744, "rewards_train/2-2": 2.5620529651641846, "rewards_train/2-w": 1.2436046600341797, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0919793844223022, "rewards_train/margins_1": 0.2773400545120239, "rewards_train/margins_2": 1.3184483051300049, "step": 156 }, { "epoch": 0.47, "logps_train/policy_1_2": -218.86331176757812, "logps_train/policy_1_l": -182.6890869140625, "logps_train/policy_1_w": -103.27911376953125, "logps_train/policy_2_2": -186.2790985107422, "logps_train/policy_2_w": -127.41500854492188, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 0.9644491672515869, "rewards_train/1-l": -1.6829707622528076, "rewards_train/1-w": 1.9119319915771484, "rewards_train/2-2": 2.0314652919769287, "rewards_train/2-w": 1.31553053855896, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.594902753829956, "rewards_train/margins_1": 0.9474828243255615, "rewards_train/margins_2": 0.7159347534179688, "step": 156 }, { "epoch": 0.47, "logps_train/policy_1_2": -141.55641174316406, "logps_train/policy_1_l": -150.88909912109375, "logps_train/policy_1_w": -164.8995361328125, "logps_train/policy_2_2": -115.0450439453125, "logps_train/policy_2_w": -198.03485107421875, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": 1.152561902999878, "rewards_train/1-l": -1.002777338027954, "rewards_train/1-w": 2.32645320892334, "rewards_train/2-2": 1.8997920751571655, "rewards_train/2-w": 1.4926084280014038, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.329230546951294, "rewards_train/margins_1": 1.173891305923462, "rewards_train/margins_2": 0.4071836471557617, "step": 156 }, { "epoch": 0.47, "logps_train/policy_1_2": -144.01596069335938, "logps_train/policy_1_l": -122.48249053955078, "logps_train/policy_1_w": -138.68540954589844, "logps_train/policy_2_2": -126.41092681884766, "logps_train/policy_2_w": -164.65394592285156, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 0.8706691265106201, "rewards_train/1-l": -0.3427801728248596, "rewards_train/1-w": 2.0963027477264404, "rewards_train/2-2": 1.6143759489059448, "rewards_train/2-w": 1.5963245630264282, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.4390829205513, "rewards_train/margins_1": 1.2256336212158203, "rewards_train/margins_2": 0.0180513858795166, "step": 156 }, { "epoch": 0.47, "logps_train/policy_1_2": -153.4357147216797, "logps_train/policy_1_l": -90.08296203613281, "logps_train/policy_1_w": -118.02848815917969, "logps_train/policy_2_2": -120.25519561767578, "logps_train/policy_2_w": -151.82037353515625, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.1298656463623047, "rewards_train/1-l": -0.6416462659835815, "rewards_train/1-w": 1.4869953393936157, "rewards_train/2-2": 1.8807302713394165, "rewards_train/2-w": 0.3507743179798126, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.1286416053771973, "rewards_train/margins_1": 0.35712969303131104, "rewards_train/margins_2": 1.5299559533596039, "step": 156 }, { "epoch": 0.47, "logps_train/policy_1_2": -199.09124755859375, "logps_train/policy_1_l": -113.76144409179688, "logps_train/policy_1_w": -140.08322143554688, "logps_train/policy_2_2": -158.32571411132812, "logps_train/policy_2_w": -177.6183624267578, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 0.9721241593360901, "rewards_train/1-l": -0.6490200757980347, "rewards_train/1-w": 2.304568290710449, "rewards_train/2-2": 2.4010231494903564, "rewards_train/2-w": 1.3416796922683716, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.953588366508484, "rewards_train/margins_1": 1.3324441313743591, "rewards_train/margins_2": 1.0593434572219849, "step": 157 }, { "epoch": 0.47, "logps_train/policy_1_2": -143.38360595703125, "logps_train/policy_1_l": -232.888427734375, "logps_train/policy_1_w": -180.69686889648438, "logps_train/policy_2_2": -117.31578063964844, "logps_train/policy_2_w": -227.78976440429688, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 1.8835134506225586, "rewards_train/1-l": -1.0825936794281006, "rewards_train/1-w": 1.970936894416809, "rewards_train/2-2": 2.5465474128723145, "rewards_train/2-w": 0.36477458477020264, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0535305738449097, "rewards_train/margins_1": 0.08742344379425049, "rewards_train/margins_2": 2.181772828102112, "step": 157 }, { "epoch": 0.47, "logps_train/policy_1_2": -191.03021240234375, "logps_train/policy_1_l": -127.71499633789062, "logps_train/policy_1_w": -85.4739990234375, "logps_train/policy_2_2": -168.102294921875, "logps_train/policy_2_w": -101.50119018554688, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -113.5, "rewards_train/1-2": 1.3235406875610352, "rewards_train/1-l": -1.0199365615844727, "rewards_train/1-w": 1.566272258758545, "rewards_train/2-2": 1.9819574356079102, "rewards_train/2-w": 1.1959751844406128, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.5862088203430176, "rewards_train/margins_1": 0.24273157119750977, "rewards_train/margins_2": 0.7859822511672974, "step": 157 }, { "epoch": 0.47, "logps_train/policy_1_2": -164.94479370117188, "logps_train/policy_1_l": -222.9640350341797, "logps_train/policy_1_w": -116.4776611328125, "logps_train/policy_2_2": -142.78390502929688, "logps_train/policy_2_w": -134.44552612304688, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 1.25473952293396, "rewards_train/1-l": -2.898552417755127, "rewards_train/1-w": 1.9885625839233398, "rewards_train/2-2": 1.8829368352890015, "rewards_train/2-w": 1.2632598876953125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.887115001678467, "rewards_train/margins_1": 0.7338230609893799, "rewards_train/margins_2": 0.619676947593689, "step": 157 }, { "epoch": 0.47, "logps_train/policy_1_2": -136.3054962158203, "logps_train/policy_1_l": -123.59547424316406, "logps_train/policy_1_w": -124.5924072265625, "logps_train/policy_2_2": -101.34669494628906, "logps_train/policy_2_w": -154.90724182128906, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -114.5, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 0.16476312279701233, "rewards_train/1-l": -0.9189223647117615, "rewards_train/1-w": 1.7774779796600342, "rewards_train/2-2": 1.5489237308502197, "rewards_train/2-w": 0.99833744764328, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.6964003443717957, "rewards_train/margins_1": 1.6127148568630219, "rewards_train/margins_2": 0.5505862832069397, "step": 157 }, { "epoch": 0.47, "logps_train/policy_1_2": -206.2291717529297, "logps_train/policy_1_l": -207.6658172607422, "logps_train/policy_1_w": -221.75555419921875, "logps_train/policy_2_2": -169.16372680664062, "logps_train/policy_2_w": -271.5279541015625, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -252.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -284.0, "rewards_train/1-2": 0.9005199670791626, "rewards_train/1-l": -1.3165819644927979, "rewards_train/1-w": 3.0369443893432617, "rewards_train/2-2": 2.2070651054382324, "rewards_train/2-w": 1.122987985610962, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.35352635383606, "rewards_train/margins_1": 2.136424422264099, "rewards_train/margins_2": 1.0840771198272705, "step": 157 }, { "epoch": 0.47, "logps_train/policy_1_2": -115.77996826171875, "logps_train/policy_1_l": -94.43343353271484, "logps_train/policy_1_w": -115.11420440673828, "logps_train/policy_2_2": -96.1612319946289, "logps_train/policy_2_w": -143.19723510742188, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -83.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.2126281261444092, "rewards_train/1-l": -1.0917811393737793, "rewards_train/1-w": 1.8549858331680298, "rewards_train/2-2": 1.8088767528533936, "rewards_train/2-w": 1.252152442932129, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.946766972541809, "rewards_train/margins_1": 0.6423577070236206, "rewards_train/margins_2": 0.5567243099212646, "step": 157 }, { "epoch": 0.47, "logps_train/policy_1_2": -214.44598388671875, "logps_train/policy_1_l": -155.78201293945312, "logps_train/policy_1_w": -133.71572875976562, "logps_train/policy_2_2": -178.43280029296875, "logps_train/policy_2_w": -163.31039428710938, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -211.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 2.191338062286377, "rewards_train/1-l": -0.8657013177871704, "rewards_train/1-w": 1.239950180053711, "rewards_train/2-2": 3.2473440170288086, "rewards_train/2-w": 0.5896626114845276, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.1056514978408813, "rewards_train/margins_1": -0.951387882232666, "rewards_train/margins_2": 2.657681405544281, "step": 157 }, { "epoch": 0.47, "learning_rate": 4.545516298697006e-06, "loss": 0.7814, "step": 158 }, { "epoch": 0.47, "logps_train/policy_1_2": -143.47317504882812, "logps_train/policy_1_l": -121.62115478515625, "logps_train/policy_1_w": -102.88471984863281, "logps_train/policy_2_2": -113.61530303955078, "logps_train/policy_2_w": -125.6769790649414, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.1300268173217773, "rewards_train/1-l": -1.2476630210876465, "rewards_train/1-w": 2.010747194290161, "rewards_train/2-2": 1.8757740259170532, "rewards_train/2-w": 1.595583438873291, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2584102153778076, "rewards_train/margins_1": 0.8807203769683838, "rewards_train/margins_2": 0.2801905870437622, "step": 158 }, { "epoch": 0.47, "logps_train/policy_1_2": -196.22817993164062, "logps_train/policy_1_l": -158.30303955078125, "logps_train/policy_1_w": -104.41259002685547, "logps_train/policy_2_2": -151.856201171875, "logps_train/policy_2_w": -150.9050750732422, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 0.8271831274032593, "rewards_train/1-l": -2.0849905014038086, "rewards_train/1-w": 1.5079598426818848, "rewards_train/2-2": 2.0675039291381836, "rewards_train/2-w": 0.5298051834106445, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5929503440856934, "rewards_train/margins_1": 0.6807767152786255, "rewards_train/margins_2": 1.537698745727539, "step": 158 }, { "epoch": 0.47, "logps_train/policy_1_2": -191.39842224121094, "logps_train/policy_1_l": -137.7856903076172, "logps_train/policy_1_w": -116.57750701904297, "logps_train/policy_2_2": -148.31924438476562, "logps_train/policy_2_w": -154.625244140625, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 1.0617198944091797, "rewards_train/1-l": -1.5190837383270264, "rewards_train/1-w": 1.3013430833816528, "rewards_train/2-2": 2.061825752258301, "rewards_train/2-w": 0.4398196339607239, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.820426821708679, "rewards_train/margins_1": 0.23962318897247314, "rewards_train/margins_2": 1.622006118297577, "step": 158 }, { "epoch": 0.47, "logps_train/policy_1_2": -204.8656005859375, "logps_train/policy_1_l": -172.72323608398438, "logps_train/policy_1_w": -147.001708984375, "logps_train/policy_2_2": -189.271484375, "logps_train/policy_2_w": -168.38719177246094, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 1.2892203330993652, "rewards_train/1-l": -1.3926355838775635, "rewards_train/1-w": 1.3826427459716797, "rewards_train/2-2": 1.9001944065093994, "rewards_train/2-w": 1.0550309419631958, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.775278329849243, "rewards_train/margins_1": 0.09342241287231445, "rewards_train/margins_2": 0.8451634645462036, "step": 158 }, { "epoch": 0.47, "logps_train/policy_1_2": -110.56396484375, "logps_train/policy_1_l": -68.499267578125, "logps_train/policy_1_w": -95.00692749023438, "logps_train/policy_2_2": -90.66879272460938, "logps_train/policy_2_w": -115.20367431640625, "logps_train/ref_1_2": -119.5, "logps_train/ref_1_l": -61.5, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -105.5, "logps_train/ref_2_w": -126.5, "rewards_train/1-2": 0.9006344676017761, "rewards_train/1-l": -0.6830811500549316, "rewards_train/1-w": 1.8395419120788574, "rewards_train/2-2": 1.4897611141204834, "rewards_train/2-w": 1.1093199253082275, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.522623062133789, "rewards_train/margins_1": 0.9389074444770813, "rewards_train/margins_2": 0.38044118881225586, "step": 158 }, { "epoch": 0.47, "logps_train/policy_1_2": -98.67486572265625, "logps_train/policy_1_l": -213.99301147460938, "logps_train/policy_1_w": -69.47371673583984, "logps_train/policy_2_2": -81.02255249023438, "logps_train/policy_2_w": -102.42967224121094, "logps_train/ref_1_2": -105.5, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -83.5, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -112.0, "rewards_train/1-2": 0.707513153553009, "rewards_train/1-l": -3.058675527572632, "rewards_train/1-w": 1.3776285648345947, "rewards_train/2-2": 1.2805575132369995, "rewards_train/2-w": 0.9265643954277039, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.436304092407227, "rewards_train/margins_1": 0.6701154112815857, "rewards_train/margins_2": 0.35399311780929565, "step": 158 }, { "epoch": 0.47, "logps_train/policy_1_2": -101.77594757080078, "logps_train/policy_1_l": -160.40249633789062, "logps_train/policy_1_w": -181.85211181640625, "logps_train/policy_2_2": -79.35830688476562, "logps_train/policy_2_w": -231.51296997070312, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -236.0, "rewards_train/1-2": 0.9317802786827087, "rewards_train/1-l": -1.7496252059936523, "rewards_train/1-w": 2.225727081298828, "rewards_train/2-2": 1.5430755615234375, "rewards_train/2-w": 0.46432673931121826, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.9753522872924805, "rewards_train/margins_1": 1.2939468026161194, "rewards_train/margins_2": 1.0787488222122192, "step": 158 }, { "epoch": 0.47, "logps_train/policy_1_2": -115.2186508178711, "logps_train/policy_1_l": -94.34339904785156, "logps_train/policy_1_w": -94.01863098144531, "logps_train/policy_2_2": -93.85883331298828, "logps_train/policy_2_w": -119.52584838867188, "logps_train/ref_1_2": -127.5, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -112.5, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.228916883468628, "rewards_train/1-l": -1.1228160858154297, "rewards_train/1-w": 1.6973555088043213, "rewards_train/2-2": 1.8422420024871826, "rewards_train/2-w": 1.0013213157653809, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.820171594619751, "rewards_train/margins_1": 0.46843862533569336, "rewards_train/margins_2": 0.8409206867218018, "step": 158 }, { "epoch": 0.48, "logps_train/policy_1_2": -208.21133422851562, "logps_train/policy_1_l": -196.9047088623047, "logps_train/policy_1_w": -143.50140380859375, "logps_train/policy_2_2": -163.349365234375, "logps_train/policy_2_w": -186.33273315429688, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.6374598741531372, "rewards_train/1-l": -1.780314564704895, "rewards_train/1-w": 2.4225151538848877, "rewards_train/2-2": 2.75100040435791, "rewards_train/2-w": 1.0979766845703125, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.202829718589783, "rewards_train/margins_1": 0.7850552797317505, "rewards_train/margins_2": 1.6530237197875977, "step": 159 }, { "epoch": 0.48, "logps_train/policy_1_2": -349.1124267578125, "logps_train/policy_1_l": -240.13067626953125, "logps_train/policy_1_w": -233.79806518554688, "logps_train/policy_2_2": -276.8687744140625, "logps_train/policy_2_w": -299.89312744140625, "logps_train/ref_1_2": -362.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -268.0, "logps_train/ref_2_2": -314.0, "logps_train/ref_2_w": -314.0, "rewards_train/1-2": 1.3012568950653076, "rewards_train/1-l": -2.197443962097168, "rewards_train/1-w": 3.53894305229187, "rewards_train/2-2": 3.7131221294403076, "rewards_train/2-w": 1.39506196975708, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.736387014389038, "rewards_train/margins_1": 2.2376861572265625, "rewards_train/margins_2": 2.3180601596832275, "step": 159 }, { "epoch": 0.48, "logps_train/policy_1_2": -161.67608642578125, "logps_train/policy_1_l": -158.72894287109375, "logps_train/policy_1_w": -101.8630599975586, "logps_train/policy_2_2": -124.91056823730469, "logps_train/policy_2_w": -130.33065795898438, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 0.732390820980072, "rewards_train/1-l": -1.084221601486206, "rewards_train/1-w": 1.5673565864562988, "rewards_train/2-2": 1.8336501121520996, "rewards_train/2-w": 0.7559974193572998, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.651578187942505, "rewards_train/margins_1": 0.8349657654762268, "rewards_train/margins_2": 1.0776526927947998, "step": 159 }, { "epoch": 0.48, "logps_train/policy_1_2": -192.90682983398438, "logps_train/policy_1_l": -145.25155639648438, "logps_train/policy_1_w": -116.40397644042969, "logps_train/policy_2_2": -151.47366333007812, "logps_train/policy_2_w": -150.1139678955078, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 0.4671289324760437, "rewards_train/1-l": -1.0544512271881104, "rewards_train/1-w": 2.2976877689361572, "rewards_train/2-2": 1.5791971683502197, "rewards_train/2-w": 1.4112597703933716, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3521389961242676, "rewards_train/margins_1": 1.8305588364601135, "rewards_train/margins_2": 0.16793739795684814, "step": 159 }, { "epoch": 0.48, "logps_train/policy_1_2": -127.67505645751953, "logps_train/policy_1_l": -196.31544494628906, "logps_train/policy_1_w": -124.13192749023438, "logps_train/policy_2_2": -99.20797729492188, "logps_train/policy_2_w": -154.314697265625, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.3278069496154785, "rewards_train/1-l": -2.2825210094451904, "rewards_train/1-w": 2.288370132446289, "rewards_train/2-2": 1.9026401042938232, "rewards_train/2-w": 1.3529056310653687, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.5708911418914795, "rewards_train/margins_1": 0.9605631828308105, "rewards_train/margins_2": 0.5497344732284546, "step": 159 }, { "epoch": 0.48, "logps_train/policy_1_2": -132.11038208007812, "logps_train/policy_1_l": -229.26327514648438, "logps_train/policy_1_w": -181.2276611328125, "logps_train/policy_2_2": -96.87163543701172, "logps_train/policy_2_w": -231.6727294921875, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -107.5, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 0.492869108915329, "rewards_train/1-l": -3.180819272994995, "rewards_train/1-w": 1.9088733196258545, "rewards_train/2-2": 1.0452580451965332, "rewards_train/2-w": 0.8378053903579712, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.08969259262085, "rewards_train/margins_1": 1.4160042107105255, "rewards_train/margins_2": 0.207452654838562, "step": 159 }, { "epoch": 0.48, "logps_train/policy_1_2": -161.66001892089844, "logps_train/policy_1_l": -57.45143508911133, "logps_train/policy_1_w": -88.18672180175781, "logps_train/policy_2_2": -126.33362579345703, "logps_train/policy_2_w": -112.67890930175781, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -51.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 1.123060941696167, "rewards_train/1-l": -0.6535418033599854, "rewards_train/1-w": 1.2532025575637817, "rewards_train/2-2": 2.0767931938171387, "rewards_train/2-w": 0.5227347016334534, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.906744360923767, "rewards_train/margins_1": 0.13014161586761475, "rewards_train/margins_2": 1.5540584921836853, "step": 159 }, { "epoch": 0.48, "logps_train/policy_1_2": -174.22003173828125, "logps_train/policy_1_l": -227.38442993164062, "logps_train/policy_1_w": -206.59515380859375, "logps_train/policy_2_2": -136.54769897460938, "logps_train/policy_2_w": -253.96829223632812, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -240.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -272.0, "rewards_train/1-2": 1.8412775993347168, "rewards_train/1-l": -1.5321924686431885, "rewards_train/1-w": 3.4303274154663086, "rewards_train/2-2": 2.924137592315674, "rewards_train/2-w": 1.768014669418335, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.962519884109497, "rewards_train/margins_1": 1.5890498161315918, "rewards_train/margins_2": 1.1561229228973389, "step": 159 }, { "epoch": 0.48, "learning_rate": 4.531217195014204e-06, "loss": 0.7502, "step": 160 }, { "epoch": 0.48, "logps_train/policy_1_2": -191.15444946289062, "logps_train/policy_1_l": -158.45425415039062, "logps_train/policy_1_w": -152.42214965820312, "logps_train/policy_2_2": -164.05633544921875, "logps_train/policy_2_w": -193.6570281982422, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": 1.4376791715621948, "rewards_train/1-l": -1.570425271987915, "rewards_train/1-w": 3.0171608924865723, "rewards_train/2-2": 2.1818652153015137, "rewards_train/2-w": 1.7280473709106445, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.587586164474487, "rewards_train/margins_1": 1.5794817209243774, "rewards_train/margins_2": 0.45381784439086914, "step": 160 }, { "epoch": 0.48, "logps_train/policy_1_2": -141.9601593017578, "logps_train/policy_1_l": -123.42173767089844, "logps_train/policy_1_w": -82.23968505859375, "logps_train/policy_2_2": -104.71379089355469, "logps_train/policy_2_w": -109.98804473876953, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -99.5, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.1961722373962402, "rewards_train/1-l": -1.8486194610595703, "rewards_train/1-w": 1.7018132209777832, "rewards_train/2-2": 2.427057981491089, "rewards_train/2-w": 1.3074452877044678, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5504326820373535, "rewards_train/margins_1": 0.505640983581543, "rewards_train/margins_2": 1.119612693786621, "step": 160 }, { "epoch": 0.48, "logps_train/policy_1_2": -137.999755859375, "logps_train/policy_1_l": -131.52455139160156, "logps_train/policy_1_w": -119.17648315429688, "logps_train/policy_2_2": -112.4541015625, "logps_train/policy_2_w": -144.409423828125, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 0.5172123312950134, "rewards_train/1-l": -1.2173962593078613, "rewards_train/1-w": 1.53000807762146, "rewards_train/2-2": 1.296776533126831, "rewards_train/2-w": 0.815208911895752, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7474043369293213, "rewards_train/margins_1": 1.0127957463264465, "rewards_train/margins_2": 0.4815676212310791, "step": 160 }, { "epoch": 0.48, "logps_train/policy_1_2": -160.7032012939453, "logps_train/policy_1_l": -104.40943145751953, "logps_train/policy_1_w": -60.4078369140625, "logps_train/policy_2_2": -128.15292358398438, "logps_train/policy_2_w": -81.38863372802734, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -95.5, "logps_train/ref_1_w": -69.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -87.0, "rewards_train/1-2": 0.7140546441078186, "rewards_train/1-l": -0.899146556854248, "rewards_train/1-w": 0.8455443382263184, "rewards_train/2-2": 1.6550202369689941, "rewards_train/2-w": 0.559573769569397, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.7446908950805664, "rewards_train/margins_1": 0.13148969411849976, "rewards_train/margins_2": 1.0954464673995972, "step": 160 }, { "epoch": 0.48, "logps_train/policy_1_2": -105.86343383789062, "logps_train/policy_1_l": -122.0117416381836, "logps_train/policy_1_w": -122.4052734375, "logps_train/policy_2_2": -79.89183044433594, "logps_train/policy_2_w": -151.185302734375, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -104.5, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 0.8011568188667297, "rewards_train/1-l": -1.7460960149765015, "rewards_train/1-w": 2.2778801918029785, "rewards_train/2-2": 1.1373789310455322, "rewards_train/2-w": 1.1438719034194946, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.02397620677948, "rewards_train/margins_1": 1.4767233729362488, "rewards_train/margins_2": -0.006492972373962402, "step": 160 }, { "epoch": 0.48, "logps_train/policy_1_2": -166.16970825195312, "logps_train/policy_1_l": -174.40504455566406, "logps_train/policy_1_w": -165.52642822265625, "logps_train/policy_2_2": -131.0540771484375, "logps_train/policy_2_w": -205.62039184570312, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 1.3306858539581299, "rewards_train/1-l": -0.7933363318443298, "rewards_train/1-w": 2.2692322731018066, "rewards_train/2-2": 2.086388349533081, "rewards_train/2-w": 0.9628626108169556, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0625686049461365, "rewards_train/margins_1": 0.9385464191436768, "rewards_train/margins_2": 1.1235257387161255, "step": 160 }, { "epoch": 0.48, "logps_train/policy_1_2": -151.25279235839844, "logps_train/policy_1_l": -142.046875, "logps_train/policy_1_w": -133.50143432617188, "logps_train/policy_2_2": -128.08450317382812, "logps_train/policy_2_w": -163.66632080078125, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.914564847946167, "rewards_train/1-l": -1.9097660779953003, "rewards_train/1-w": 1.5686074495315552, "rewards_train/2-2": 1.5438928604125977, "rewards_train/2-w": 0.4771174192428589, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4783735275268555, "rewards_train/margins_1": 0.6540426015853882, "rewards_train/margins_2": 1.0667754411697388, "step": 160 }, { "epoch": 0.48, "logps_train/policy_1_2": -148.26998901367188, "logps_train/policy_1_l": -236.11932373046875, "logps_train/policy_1_w": -228.03622436523438, "logps_train/policy_2_2": -125.4765625, "logps_train/policy_2_w": -273.8160400390625, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -252.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -286.0, "rewards_train/1-2": 1.5327677726745605, "rewards_train/1-l": -1.7689629793167114, "rewards_train/1-w": 2.480752944946289, "rewards_train/2-2": 2.1845712661743164, "rewards_train/2-w": 1.2340203523635864, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.2497159242630005, "rewards_train/margins_1": 0.9479851722717285, "rewards_train/margins_2": 0.95055091381073, "step": 160 }, { "epoch": 0.48, "logps_train/policy_1_2": -167.31048583984375, "logps_train/policy_1_l": -203.48843383789062, "logps_train/policy_1_w": -181.94134521484375, "logps_train/policy_2_2": -137.15420532226562, "logps_train/policy_2_w": -218.55230712890625, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -211.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -239.0, "rewards_train/1-2": 1.7935607433319092, "rewards_train/1-l": -1.4879050254821777, "rewards_train/1-w": 2.9156320095062256, "rewards_train/2-2": 2.2025489807128906, "rewards_train/2-w": 2.0432076454162598, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.403537034988403, "rewards_train/margins_1": 1.1220712661743164, "rewards_train/margins_2": 0.15934133529663086, "step": 161 }, { "epoch": 0.48, "logps_train/policy_1_2": -122.99217224121094, "logps_train/policy_1_l": -155.09994506835938, "logps_train/policy_1_w": -154.46237182617188, "logps_train/policy_2_2": -96.43220520019531, "logps_train/policy_2_w": -198.79518127441406, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.850782871246338, "rewards_train/1-l": -1.924057126045227, "rewards_train/1-w": 2.2397000789642334, "rewards_train/2-2": 2.394279956817627, "rewards_train/2-w": 1.3298569917678833, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.1637572050094604, "rewards_train/margins_1": 0.3889172077178955, "rewards_train/margins_2": 1.0644229650497437, "step": 161 }, { "epoch": 0.48, "logps_train/policy_1_2": -142.27442932128906, "logps_train/policy_1_l": -210.95925903320312, "logps_train/policy_1_w": -118.45161437988281, "logps_train/policy_2_2": -107.28398895263672, "logps_train/policy_2_w": -157.42971801757812, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.4834948778152466, "rewards_train/1-l": -2.567410469055176, "rewards_train/1-w": 2.0970263481140137, "rewards_train/2-2": 2.2130074501037598, "rewards_train/2-w": 1.0132793188095093, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.6644368171691895, "rewards_train/margins_1": 0.6135314702987671, "rewards_train/margins_2": 1.1997281312942505, "step": 161 }, { "epoch": 0.48, "logps_train/policy_1_2": -161.4798583984375, "logps_train/policy_1_l": -166.8834228515625, "logps_train/policy_1_w": -128.96026611328125, "logps_train/policy_2_2": -135.6544189453125, "logps_train/policy_2_w": -159.78213500976562, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.1738896369934082, "rewards_train/1-l": -1.9961557388305664, "rewards_train/1-w": 2.043036699295044, "rewards_train/2-2": 1.9978387355804443, "rewards_train/2-w": 1.363193154335022, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.03919243812561, "rewards_train/margins_1": 0.8691470623016357, "rewards_train/margins_2": 0.6346455812454224, "step": 161 }, { "epoch": 0.48, "logps_train/policy_1_2": -130.66262817382812, "logps_train/policy_1_l": -183.42074584960938, "logps_train/policy_1_w": -198.41372680664062, "logps_train/policy_2_2": -114.15467834472656, "logps_train/policy_2_w": -233.0904541015625, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -225.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 1.0962374210357666, "rewards_train/1-l": -1.4670746326446533, "rewards_train/1-w": 2.582063674926758, "rewards_train/2-2": 1.4970316886901855, "rewards_train/2-w": 1.4222064018249512, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.049138307571411, "rewards_train/margins_1": 1.4858262538909912, "rewards_train/margins_2": 0.07482528686523438, "step": 161 }, { "epoch": 0.48, "logps_train/policy_1_2": -152.52810668945312, "logps_train/policy_1_l": -234.7044219970703, "logps_train/policy_1_w": -135.47348022460938, "logps_train/policy_2_2": -126.98597717285156, "logps_train/policy_2_w": -178.11090087890625, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -201.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.4643762111663818, "rewards_train/1-l": -3.3548166751861572, "rewards_train/1-w": 2.632340908050537, "rewards_train/2-2": 2.057652473449707, "rewards_train/2-w": 1.457660436630249, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.987157583236694, "rewards_train/margins_1": 1.1679646968841553, "rewards_train/margins_2": 0.599992036819458, "step": 161 }, { "epoch": 0.48, "logps_train/policy_1_2": -97.58586883544922, "logps_train/policy_1_l": -101.92383575439453, "logps_train/policy_1_w": -120.70619201660156, "logps_train/policy_2_2": -71.09487915039062, "logps_train/policy_2_w": -165.8370819091797, "logps_train/ref_1_2": -104.5, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -83.5, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 0.6976636052131653, "rewards_train/1-l": -1.1193374395370483, "rewards_train/1-w": 1.3965684175491333, "rewards_train/2-2": 1.2530114650726318, "rewards_train/2-w": 0.42176058888435364, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.5159058570861816, "rewards_train/margins_1": 0.698904812335968, "rewards_train/margins_2": 0.8312508761882782, "step": 161 }, { "epoch": 0.48, "logps_train/policy_1_2": -107.77633666992188, "logps_train/policy_1_l": -103.56620788574219, "logps_train/policy_1_w": -84.64126586914062, "logps_train/policy_2_2": -88.52186584472656, "logps_train/policy_2_w": -111.81803131103516, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.14624270796775818, "rewards_train/1-l": -1.9819390773773193, "rewards_train/1-w": 2.838998317718506, "rewards_train/2-2": 0.6549421548843384, "rewards_train/2-w": 2.2111659049987793, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 4.820937395095825, "rewards_train/margins_1": 2.6927556097507477, "rewards_train/margins_2": -1.556223750114441, "step": 161 }, { "epoch": 0.49, "learning_rate": 4.516719848113983e-06, "loss": 0.8265, "step": 162 }, { "epoch": 0.49, "logps_train/policy_1_2": -139.67054748535156, "logps_train/policy_1_l": -101.17791748046875, "logps_train/policy_1_w": -90.90129089355469, "logps_train/policy_2_2": -118.29100036621094, "logps_train/policy_2_w": -108.5149154663086, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -93.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -111.5, "rewards_train/1-2": 1.6157574653625488, "rewards_train/1-l": -0.7761906385421753, "rewards_train/1-w": 1.041121482849121, "rewards_train/2-2": 2.0701193809509277, "rewards_train/2-w": 0.32194584608078003, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.8173121213912964, "rewards_train/margins_1": -0.5746359825134277, "rewards_train/margins_2": 1.7481735348701477, "step": 162 }, { "epoch": 0.49, "logps_train/policy_1_2": -169.9673309326172, "logps_train/policy_1_l": -200.23721313476562, "logps_train/policy_1_w": -125.117919921875, "logps_train/policy_2_2": -135.30226135253906, "logps_train/policy_2_w": -164.63510131835938, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.526705026626587, "rewards_train/1-l": -2.0280187129974365, "rewards_train/1-w": 2.055004835128784, "rewards_train/2-2": 2.2236809730529785, "rewards_train/2-w": 0.7790683507919312, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.083023548126221, "rewards_train/margins_1": 0.5282998085021973, "rewards_train/margins_2": 1.4446126222610474, "step": 162 }, { "epoch": 0.49, "logps_train/policy_1_2": -127.32186889648438, "logps_train/policy_1_l": -108.89808654785156, "logps_train/policy_1_w": -137.05645751953125, "logps_train/policy_2_2": -99.13484954833984, "logps_train/policy_2_w": -172.74520874023438, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 0.9912506937980652, "rewards_train/1-l": -0.5710585117340088, "rewards_train/1-w": 3.1724801063537598, "rewards_train/2-2": 1.625577449798584, "rewards_train/2-w": 1.9473546743392944, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.7435386180877686, "rewards_train/margins_1": 2.1812294125556946, "rewards_train/margins_2": -0.32177722454071045, "step": 162 }, { "epoch": 0.49, "logps_train/policy_1_2": -130.6474609375, "logps_train/policy_1_l": -182.2490997314453, "logps_train/policy_1_w": -153.06495666503906, "logps_train/policy_2_2": -105.40294647216797, "logps_train/policy_2_w": -190.81956481933594, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.3977547883987427, "rewards_train/1-l": -1.637410044670105, "rewards_train/1-w": 1.8942848443984985, "rewards_train/2-2": 2.085681438446045, "rewards_train/2-w": 0.5147228837013245, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.5316948890686035, "rewards_train/margins_1": 0.49653005599975586, "rewards_train/margins_2": 1.5709585547447205, "step": 162 }, { "epoch": 0.49, "logps_train/policy_1_2": -123.52947998046875, "logps_train/policy_1_l": -109.43841552734375, "logps_train/policy_1_w": -82.78053283691406, "logps_train/policy_2_2": -98.27754211425781, "logps_train/policy_2_w": -99.34219360351562, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -98.5, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 1.475177526473999, "rewards_train/1-l": -1.0932559967041016, "rewards_train/1-w": 2.2442126274108887, "rewards_train/2-2": 2.219120740890503, "rewards_train/2-w": 1.9767180681228638, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.3374686241149902, "rewards_train/margins_1": 0.7690351009368896, "rewards_train/margins_2": 0.24240267276763916, "step": 162 }, { "epoch": 0.49, "logps_train/policy_1_2": -253.53707885742188, "logps_train/policy_1_l": -193.97311401367188, "logps_train/policy_1_w": -144.36148071289062, "logps_train/policy_2_2": -217.67294311523438, "logps_train/policy_2_w": -172.9308319091797, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -245.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.6783229112625122, "rewards_train/1-l": -1.3248493671417236, "rewards_train/1-w": 2.6693201065063477, "rewards_train/2-2": 2.7616117000579834, "rewards_train/2-w": 1.8600420951843262, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.9941694736480713, "rewards_train/margins_1": 0.9909971952438354, "rewards_train/margins_2": 0.9015696048736572, "step": 162 }, { "epoch": 0.49, "logps_train/policy_1_2": -230.0696563720703, "logps_train/policy_1_l": -207.33126831054688, "logps_train/policy_1_w": -250.62106323242188, "logps_train/policy_2_2": -195.3774871826172, "logps_train/policy_2_w": -296.4826965332031, "logps_train/ref_1_2": -245.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -274.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -306.0, "rewards_train/1-2": 1.439909815788269, "rewards_train/1-l": -1.545626163482666, "rewards_train/1-w": 2.369534492492676, "rewards_train/2-2": 2.633345603942871, "rewards_train/2-w": 0.9232138395309448, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.915160655975342, "rewards_train/margins_1": 0.9296246767044067, "rewards_train/margins_2": 1.7101317644119263, "step": 162 }, { "epoch": 0.49, "logps_train/policy_1_2": -190.27464294433594, "logps_train/policy_1_l": -309.32073974609375, "logps_train/policy_1_w": -185.05126953125, "logps_train/policy_2_2": -154.8269500732422, "logps_train/policy_2_w": -221.08242797851562, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -280.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.5912854671478271, "rewards_train/1-l": -2.950821876525879, "rewards_train/1-w": 2.5261223316192627, "rewards_train/2-2": 2.476679801940918, "rewards_train/2-w": 1.6480064392089844, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.476944208145142, "rewards_train/margins_1": 0.9348368644714355, "rewards_train/margins_2": 0.8286733627319336, "step": 162 }, { "epoch": 0.49, "logps_train/policy_1_2": -162.3765869140625, "logps_train/policy_1_l": -167.59254455566406, "logps_train/policy_1_w": -145.15032958984375, "logps_train/policy_2_2": -138.17295837402344, "logps_train/policy_2_w": -171.08218383789062, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 2.049060344696045, "rewards_train/1-l": -0.9948010444641113, "rewards_train/1-w": 1.652154564857483, "rewards_train/2-2": 2.464735507965088, "rewards_train/2-w": 0.6949052810668945, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.6469556093215942, "rewards_train/margins_1": -0.396905779838562, "rewards_train/margins_2": 1.7698302268981934, "step": 163 }, { "epoch": 0.49, "logps_train/policy_1_2": -135.3412322998047, "logps_train/policy_1_l": -148.43319702148438, "logps_train/policy_1_w": -130.45260620117188, "logps_train/policy_2_2": -111.26734161376953, "logps_train/policy_2_w": -155.91319274902344, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 0.8877516984939575, "rewards_train/1-l": -1.4460525512695312, "rewards_train/1-w": 1.6113803386688232, "rewards_train/2-2": 1.6732659339904785, "rewards_train/2-w": 0.8703997731208801, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0574328899383545, "rewards_train/margins_1": 0.7236286401748657, "rewards_train/margins_2": 0.8028661608695984, "step": 163 }, { "epoch": 0.49, "logps_train/policy_1_2": -213.03594970703125, "logps_train/policy_1_l": -301.9696960449219, "logps_train/policy_1_w": -210.5604248046875, "logps_train/policy_2_2": -163.42616271972656, "logps_train/policy_2_w": -278.575927734375, "logps_train/ref_1_2": -231.0, "logps_train/ref_1_l": -272.0, "logps_train/ref_1_w": -240.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -288.0, "rewards_train/1-2": 1.808905839920044, "rewards_train/1-l": -2.8883769512176514, "rewards_train/1-w": 3.024425983428955, "rewards_train/2-2": 2.8058202266693115, "rewards_train/2-w": 1.067406177520752, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.9128029346466064, "rewards_train/margins_1": 1.2155201435089111, "rewards_train/margins_2": 1.7384140491485596, "step": 163 }, { "epoch": 0.49, "logps_train/policy_1_2": -236.49266052246094, "logps_train/policy_1_l": -197.76918029785156, "logps_train/policy_1_w": -183.46548461914062, "logps_train/policy_2_2": -203.2025146484375, "logps_train/policy_2_w": -231.2388153076172, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -230.0, "logps_train/ref_2_w": -250.0, "rewards_train/1-2": 1.3663595914840698, "rewards_train/1-l": -1.3973276615142822, "rewards_train/1-w": 3.165952205657959, "rewards_train/2-2": 2.6359975337982178, "rewards_train/2-w": 1.8792428970336914, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.563279867172241, "rewards_train/margins_1": 1.7995926141738892, "rewards_train/margins_2": 0.7567546367645264, "step": 163 }, { "epoch": 0.49, "logps_train/policy_1_2": -124.6507339477539, "logps_train/policy_1_l": -130.06597900390625, "logps_train/policy_1_w": -120.76102447509766, "logps_train/policy_2_2": -100.18290710449219, "logps_train/policy_2_w": -142.37001037597656, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -117.5, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 0.9755522012710571, "rewards_train/1-l": -0.950153112411499, "rewards_train/1-w": 1.6309292316436768, "rewards_train/2-2": 1.756709337234497, "rewards_train/2-w": 1.1817491054534912, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.581082344055176, "rewards_train/margins_1": 0.6553770303726196, "rewards_train/margins_2": 0.5749602317810059, "step": 163 }, { "epoch": 0.49, "logps_train/policy_1_2": -211.8853302001953, "logps_train/policy_1_l": -198.6986083984375, "logps_train/policy_1_w": -121.66282653808594, "logps_train/policy_2_2": -170.85043334960938, "logps_train/policy_2_w": -150.12103271484375, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 0.9505292773246765, "rewards_train/1-l": -1.7589221000671387, "rewards_train/1-w": 2.2891862392425537, "rewards_train/2-2": 2.3532371520996094, "rewards_train/2-w": 1.277740240097046, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.048108339309692, "rewards_train/margins_1": 1.3386569619178772, "rewards_train/margins_2": 1.0754969120025635, "step": 163 }, { "epoch": 0.49, "logps_train/policy_1_2": -110.49152374267578, "logps_train/policy_1_l": -136.78472900390625, "logps_train/policy_1_w": -127.76699829101562, "logps_train/policy_2_2": -87.37134552001953, "logps_train/policy_2_w": -164.5373077392578, "logps_train/ref_1_2": -125.5, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": 1.483660101890564, "rewards_train/1-l": -1.3858941793441772, "rewards_train/1-w": 2.4326748847961426, "rewards_train/2-2": 1.9861078262329102, "rewards_train/2-w": 0.8228323459625244, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.81856906414032, "rewards_train/margins_1": 0.9490147829055786, "rewards_train/margins_2": 1.1632754802703857, "step": 163 }, { "epoch": 0.49, "logps_train/policy_1_2": -129.86293029785156, "logps_train/policy_1_l": -157.39962768554688, "logps_train/policy_1_w": -127.97937774658203, "logps_train/policy_2_2": -104.6156997680664, "logps_train/policy_2_w": -164.1220703125, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.4394888877868652, "rewards_train/1-l": -1.4639865159988403, "rewards_train/1-w": 2.09893798828125, "rewards_train/2-2": 2.1935079097747803, "rewards_train/2-w": 1.0088863372802734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5629245042800903, "rewards_train/margins_1": 0.6594491004943848, "rewards_train/margins_2": 1.1846215724945068, "step": 163 }, { "epoch": 0.49, "learning_rate": 4.502025672911845e-06, "loss": 0.7535, "step": 164 }, { "epoch": 0.49, "logps_train/policy_1_2": -183.16995239257812, "logps_train/policy_1_l": -157.38912963867188, "logps_train/policy_1_w": -143.30453491210938, "logps_train/policy_2_2": -150.23910522460938, "logps_train/policy_2_w": -179.54505920410156, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.5564428567886353, "rewards_train/1-l": -1.4496551752090454, "rewards_train/1-w": 2.5289206504821777, "rewards_train/2-2": 2.4184725284576416, "rewards_train/2-w": 1.426743745803833, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.978575825691223, "rewards_train/margins_1": 0.9724777936935425, "rewards_train/margins_2": 0.9917287826538086, "step": 164 }, { "epoch": 0.49, "logps_train/policy_1_2": -62.31389617919922, "logps_train/policy_1_l": -106.93222045898438, "logps_train/policy_1_w": -51.133216857910156, "logps_train/policy_2_2": -51.273685455322266, "logps_train/policy_2_w": -60.650474548339844, "logps_train/ref_1_2": -68.5, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -60.25, "logps_train/ref_2_2": -58.5, "logps_train/ref_2_w": -69.0, "rewards_train/1-2": 0.6010318994522095, "rewards_train/1-l": -0.7999601364135742, "rewards_train/1-w": 0.9023030400276184, "rewards_train/2-2": 0.7347410321235657, "rewards_train/2-w": 0.8029212951660156, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.7022631764411926, "rewards_train/margins_1": 0.30127114057540894, "rewards_train/margins_2": -0.06818026304244995, "step": 164 }, { "epoch": 0.49, "logps_train/policy_1_2": -115.79600524902344, "logps_train/policy_1_l": -222.78518676757812, "logps_train/policy_1_w": -120.64537811279297, "logps_train/policy_2_2": -85.86698913574219, "logps_train/policy_2_w": -151.34442138671875, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.2200090885162354, "rewards_train/1-l": -1.472415804862976, "rewards_train/1-w": 2.298743963241577, "rewards_train/2-2": 2.0261919498443604, "rewards_train/2-w": 1.3280582427978516, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.7711597681045532, "rewards_train/margins_1": 1.0787348747253418, "rewards_train/margins_2": 0.6981337070465088, "step": 164 }, { "epoch": 0.49, "logps_train/policy_1_2": -126.88296508789062, "logps_train/policy_1_l": -153.49600219726562, "logps_train/policy_1_w": -121.68273162841797, "logps_train/policy_2_2": -112.13279724121094, "logps_train/policy_2_w": -141.84835815429688, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.783578097820282, "rewards_train/1-l": -0.81522536277771, "rewards_train/1-w": 1.6051639318466187, "rewards_train/2-2": 1.2960947751998901, "rewards_train/2-w": 1.0725865364074707, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.4203892946243286, "rewards_train/margins_1": 0.8215858340263367, "rewards_train/margins_2": 0.22350823879241943, "step": 164 }, { "epoch": 0.49, "logps_train/policy_1_2": -264.6852111816406, "logps_train/policy_1_l": -232.73062133789062, "logps_train/policy_1_w": -173.21636962890625, "logps_train/policy_2_2": -218.4561309814453, "logps_train/policy_2_w": -216.16152954101562, "logps_train/ref_1_2": -272.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -243.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 0.7596038579940796, "rewards_train/1-l": -2.400991916656494, "rewards_train/1-w": 2.6010184288024902, "rewards_train/2-2": 2.4239182472229004, "rewards_train/2-w": 0.8221280574798584, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.002010345458984, "rewards_train/margins_1": 1.8414145708084106, "rewards_train/margins_2": 1.601790189743042, "step": 164 }, { "epoch": 0.49, "logps_train/policy_1_2": -160.49822998046875, "logps_train/policy_1_l": -171.98838806152344, "logps_train/policy_1_w": -145.55259704589844, "logps_train/policy_2_2": -129.08041381835938, "logps_train/policy_2_w": -193.17599487304688, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.2009575366973877, "rewards_train/1-l": -1.3582136631011963, "rewards_train/1-w": 2.354701042175293, "rewards_train/2-2": 2.274575710296631, "rewards_train/2-w": 1.4485127925872803, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7129147052764893, "rewards_train/margins_1": 1.1537435054779053, "rewards_train/margins_2": 0.8260629177093506, "step": 164 }, { "epoch": 0.49, "logps_train/policy_1_2": -126.19285583496094, "logps_train/policy_1_l": -138.08877563476562, "logps_train/policy_1_w": -109.56681823730469, "logps_train/policy_2_2": -105.15335083007812, "logps_train/policy_2_w": -146.81814575195312, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 0.758058488368988, "rewards_train/1-l": -1.9430568218231201, "rewards_train/1-w": 2.160506010055542, "rewards_train/2-2": 1.085055947303772, "rewards_train/2-w": 1.0103721618652344, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.103562831878662, "rewards_train/margins_1": 1.402447521686554, "rewards_train/margins_2": 0.0746837854385376, "step": 164 }, { "epoch": 0.49, "logps_train/policy_1_2": -177.27703857421875, "logps_train/policy_1_l": -129.22006225585938, "logps_train/policy_1_w": -88.23619079589844, "logps_train/policy_2_2": -136.64527893066406, "logps_train/policy_2_w": -110.03170776367188, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 0.9941706657409668, "rewards_train/1-l": -0.7579439878463745, "rewards_train/1-w": 1.2107555866241455, "rewards_train/2-2": 2.5354723930358887, "rewards_train/2-w": 0.8702666759490967, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.96869957447052, "rewards_train/margins_1": 0.2165849208831787, "rewards_train/margins_2": 1.665205717086792, "step": 164 }, { "epoch": 0.49, "logps_train/policy_1_2": -227.2524871826172, "logps_train/policy_1_l": -180.8282470703125, "logps_train/policy_1_w": -127.46926879882812, "logps_train/policy_2_2": -174.95603942871094, "logps_train/policy_2_w": -164.66583251953125, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.1134226322174072, "rewards_train/1-l": -1.8512828350067139, "rewards_train/1-w": 2.8167452812194824, "rewards_train/2-2": 2.906299591064453, "rewards_train/2-w": 2.0009453296661377, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.668028116226196, "rewards_train/margins_1": 1.7033226490020752, "rewards_train/margins_2": 0.9053542613983154, "step": 165 }, { "epoch": 0.49, "logps_train/policy_1_2": -231.047119140625, "logps_train/policy_1_l": -199.97500610351562, "logps_train/policy_1_w": -165.3864288330078, "logps_train/policy_2_2": -197.72845458984375, "logps_train/policy_2_w": -218.7073211669922, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -233.0, "rewards_train/1-2": 1.0546635389328003, "rewards_train/1-l": -1.3119524717330933, "rewards_train/1-w": 2.0285446643829346, "rewards_train/2-2": 2.492779016494751, "rewards_train/2-w": 1.3714566230773926, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.340497136116028, "rewards_train/margins_1": 0.9738811254501343, "rewards_train/margins_2": 1.1213223934173584, "step": 165 }, { "epoch": 0.49, "logps_train/policy_1_2": -101.92068481445312, "logps_train/policy_1_l": -95.8331069946289, "logps_train/policy_1_w": -88.0517578125, "logps_train/policy_2_2": -76.56013488769531, "logps_train/policy_2_w": -120.29474639892578, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.4266812801361084, "rewards_train/1-l": -1.4805760383605957, "rewards_train/1-w": 1.730762004852295, "rewards_train/2-2": 1.9299235343933105, "rewards_train/2-w": 0.28771233558654785, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2113380432128906, "rewards_train/margins_1": 0.3040807247161865, "rewards_train/margins_2": 1.6422111988067627, "step": 165 }, { "epoch": 0.49, "logps_train/policy_1_2": -134.6181640625, "logps_train/policy_1_l": -72.04940795898438, "logps_train/policy_1_w": -104.3311538696289, "logps_train/policy_2_2": -117.55142974853516, "logps_train/policy_2_w": -117.83831787109375, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -68.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.4522455930709839, "rewards_train/1-l": -0.39371082186698914, "rewards_train/1-w": 1.7623919248580933, "rewards_train/2-2": 1.619856595993042, "rewards_train/2-w": 1.3880436420440674, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.1561027467250824, "rewards_train/margins_1": 0.3101463317871094, "rewards_train/margins_2": 0.2318129539489746, "step": 165 }, { "epoch": 0.49, "logps_train/policy_1_2": -190.75245666503906, "logps_train/policy_1_l": -149.60516357421875, "logps_train/policy_1_w": -139.72760009765625, "logps_train/policy_2_2": -155.46421813964844, "logps_train/policy_2_w": -170.42282104492188, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 2.054441213607788, "rewards_train/1-l": -1.3780955076217651, "rewards_train/1-w": 3.2428650856018066, "rewards_train/2-2": 3.330141544342041, "rewards_train/2-w": 2.3202173709869385, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.620960593223572, "rewards_train/margins_1": 1.1884238719940186, "rewards_train/margins_2": 1.0099241733551025, "step": 165 }, { "epoch": 0.49, "logps_train/policy_1_2": -184.20237731933594, "logps_train/policy_1_l": -241.71673583984375, "logps_train/policy_1_w": -237.2322998046875, "logps_train/policy_2_2": -142.55438232421875, "logps_train/policy_2_w": -275.001220703125, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -264.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -296.0, "rewards_train/1-2": 1.1938250064849854, "rewards_train/1-l": -2.0482349395751953, "rewards_train/1-w": 2.733020544052124, "rewards_train/2-2": 2.6211252212524414, "rewards_train/2-w": 2.0311291217803955, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.781255483627319, "rewards_train/margins_1": 1.5391955375671387, "rewards_train/margins_2": 0.5899960994720459, "step": 165 }, { "epoch": 0.49, "logps_train/policy_1_2": -164.21302795410156, "logps_train/policy_1_l": -188.5447235107422, "logps_train/policy_1_w": -80.33338165283203, "logps_train/policy_2_2": -126.4244384765625, "logps_train/policy_2_w": -106.51174926757812, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -97.5, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -117.0, "rewards_train/1-2": 1.6146352291107178, "rewards_train/1-l": -2.2283003330230713, "rewards_train/1-w": 1.7307238578796387, "rewards_train/2-2": 2.9856808185577393, "rewards_train/2-w": 1.0355433225631714, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.95902419090271, "rewards_train/margins_1": 0.1160886287689209, "rewards_train/margins_2": 1.9501374959945679, "step": 165 }, { "epoch": 0.49, "logps_train/policy_1_2": -121.53279876708984, "logps_train/policy_1_l": -118.91963958740234, "logps_train/policy_1_w": -97.33384704589844, "logps_train/policy_2_2": -98.02024841308594, "logps_train/policy_2_w": -127.88270568847656, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.1310949325561523, "rewards_train/1-l": -1.0880571603775024, "rewards_train/1-w": 1.3482553958892822, "rewards_train/2-2": 1.7136001586914062, "rewards_train/2-w": 0.7625105381011963, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.4363125562667847, "rewards_train/margins_1": 0.21716046333312988, "rewards_train/margins_2": 0.95108962059021, "step": 165 }, { "epoch": 0.5, "learning_rate": 4.487136103533383e-06, "loss": 0.7607, "step": 166 }, { "epoch": 0.5, "logps_train/policy_1_2": -117.84010314941406, "logps_train/policy_1_l": -122.04762268066406, "logps_train/policy_1_w": -90.12135314941406, "logps_train/policy_2_2": -99.15731811523438, "logps_train/policy_2_w": -101.03959655761719, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -108.0, "rewards_train/1-2": 1.237473726272583, "rewards_train/1-l": -0.8610127568244934, "rewards_train/1-w": 1.0761466026306152, "rewards_train/2-2": 1.6455965042114258, "rewards_train/2-w": 0.667134165763855, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.9371593594551086, "rewards_train/margins_1": -0.16132712364196777, "rewards_train/margins_2": 0.9784623384475708, "step": 166 }, { "epoch": 0.5, "logps_train/policy_1_2": -224.36862182617188, "logps_train/policy_1_l": -140.21240234375, "logps_train/policy_1_w": -138.36883544921875, "logps_train/policy_2_2": -178.2351531982422, "logps_train/policy_2_w": -182.2333984375, "logps_train/ref_1_2": -239.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 1.4287608861923218, "rewards_train/1-l": -0.7036623954772949, "rewards_train/1-w": 2.545929431915283, "rewards_train/2-2": 2.8339061737060547, "rewards_train/2-w": 1.0657224655151367, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.249591827392578, "rewards_train/margins_1": 1.1171685457229614, "rewards_train/margins_2": 1.768183708190918, "step": 166 }, { "epoch": 0.5, "logps_train/policy_1_2": -167.58999633789062, "logps_train/policy_1_l": -126.7954330444336, "logps_train/policy_1_w": -119.24290466308594, "logps_train/policy_2_2": -145.5903778076172, "logps_train/policy_2_w": -149.57907104492188, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.160630226135254, "rewards_train/1-l": -1.0009299516677856, "rewards_train/1-w": 2.51242733001709, "rewards_train/2-2": 2.0354440212249756, "rewards_train/2-w": 1.7749062776565552, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.5133572816848755, "rewards_train/margins_1": 1.351797103881836, "rewards_train/margins_2": 0.2605377435684204, "step": 166 }, { "epoch": 0.5, "logps_train/policy_1_2": -225.8457794189453, "logps_train/policy_1_l": -217.97137451171875, "logps_train/policy_1_w": -123.7321548461914, "logps_train/policy_2_2": -185.49649047851562, "logps_train/policy_2_w": -156.69126892089844, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 0.5935484170913696, "rewards_train/1-l": -1.309638261795044, "rewards_train/1-w": 2.209597110748291, "rewards_train/2-2": 1.9034755229949951, "rewards_train/2-w": 1.540248155593872, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.519235372543335, "rewards_train/margins_1": 1.6160486936569214, "rewards_train/margins_2": 0.36322736740112305, "step": 166 }, { "epoch": 0.5, "logps_train/policy_1_2": -100.12461853027344, "logps_train/policy_1_l": -151.42510986328125, "logps_train/policy_1_w": -125.48590087890625, "logps_train/policy_2_2": -82.42713928222656, "logps_train/policy_2_w": -152.04664611816406, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 1.9203506708145142, "rewards_train/1-l": -1.9582334756851196, "rewards_train/1-w": 3.023284912109375, "rewards_train/2-2": 2.2611923217773438, "rewards_train/2-w": 2.3039298057556152, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.981518387794495, "rewards_train/margins_1": 1.1029342412948608, "rewards_train/margins_2": -0.042737483978271484, "step": 166 }, { "epoch": 0.5, "logps_train/policy_1_2": -112.08193969726562, "logps_train/policy_1_l": -111.81523895263672, "logps_train/policy_1_w": -97.75318908691406, "logps_train/policy_2_2": -87.2403564453125, "logps_train/policy_2_w": -133.9237823486328, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 1.090244174003601, "rewards_train/1-l": -1.2137503623962402, "rewards_train/1-w": 2.333275556564331, "rewards_train/2-2": 1.586120367050171, "rewards_train/2-w": 1.3201212882995605, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 3.5470259189605713, "rewards_train/margins_1": 1.24303138256073, "rewards_train/margins_2": 0.26599907875061035, "step": 166 }, { "epoch": 0.5, "logps_train/policy_1_2": -102.21730041503906, "logps_train/policy_1_l": -113.12982940673828, "logps_train/policy_1_w": -78.61305236816406, "logps_train/policy_2_2": -86.95197296142578, "logps_train/policy_2_w": -97.7044677734375, "logps_train/ref_1_2": -111.5, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -100.5, "logps_train/ref_2_w": -103.5, "rewards_train/1-2": 0.9337381720542908, "rewards_train/1-l": -0.16415446996688843, "rewards_train/1-w": 0.9933828115463257, "rewards_train/2-2": 1.3633965253829956, "rewards_train/2-w": 0.5721315145492554, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.1575372815132141, "rewards_train/margins_1": 0.05964463949203491, "rewards_train/margins_2": 0.7912650108337402, "step": 166 }, { "epoch": 0.5, "logps_train/policy_1_2": -217.73291015625, "logps_train/policy_1_l": -203.623046875, "logps_train/policy_1_w": -185.1527099609375, "logps_train/policy_2_2": -182.309326171875, "logps_train/policy_2_w": -221.33584594726562, "logps_train/ref_1_2": -229.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.1345226764678955, "rewards_train/1-l": -1.8310558795928955, "rewards_train/1-w": 2.101135492324829, "rewards_train/2-2": 2.1268794536590576, "rewards_train/2-w": 1.3007899522781372, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.9321913719177246, "rewards_train/margins_1": 0.9666128158569336, "rewards_train/margins_2": 0.8260895013809204, "step": 166 }, { "epoch": 0.5, "logps_train/policy_1_2": -149.6664581298828, "logps_train/policy_1_l": -163.58578491210938, "logps_train/policy_1_w": -169.82821655273438, "logps_train/policy_2_2": -112.97044372558594, "logps_train/policy_2_w": -224.47921752929688, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -207.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -247.0, "rewards_train/1-2": 0.2601122260093689, "rewards_train/1-l": -0.6952974796295166, "rewards_train/1-w": 3.7234268188476562, "rewards_train/2-2": 1.3250257968902588, "rewards_train/2-w": 2.2802038192749023, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.418724298477173, "rewards_train/margins_1": 3.4633145928382874, "rewards_train/margins_2": -0.9551780223846436, "step": 167 }, { "epoch": 0.5, "logps_train/policy_1_2": -127.64739227294922, "logps_train/policy_1_l": -106.10204315185547, "logps_train/policy_1_w": -47.27874755859375, "logps_train/policy_2_2": -97.61341094970703, "logps_train/policy_2_w": -71.85759735107422, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -57.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -78.5, "rewards_train/1-2": 1.0149483680725098, "rewards_train/1-l": -1.0039546489715576, "rewards_train/1-w": 0.9588444232940674, "rewards_train/2-2": 1.8230340480804443, "rewards_train/2-w": 0.6736153960227966, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.962799072265625, "rewards_train/margins_1": -0.05610394477844238, "rewards_train/margins_2": 1.1494186520576477, "step": 167 }, { "epoch": 0.5, "logps_train/policy_1_2": -199.19813537597656, "logps_train/policy_1_l": -163.59536743164062, "logps_train/policy_1_w": -162.3787841796875, "logps_train/policy_2_2": -154.9951629638672, "logps_train/policy_2_w": -209.14895629882812, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 1.01924729347229, "rewards_train/1-l": -1.429067611694336, "rewards_train/1-w": 1.4199349880218506, "rewards_train/2-2": 2.0821239948272705, "rewards_train/2-w": 0.6476031541824341, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.8490025997161865, "rewards_train/margins_1": 0.40068769454956055, "rewards_train/margins_2": 1.4345208406448364, "step": 167 }, { "epoch": 0.5, "logps_train/policy_1_2": -100.94305419921875, "logps_train/policy_1_l": -141.97763061523438, "logps_train/policy_1_w": -146.0006866455078, "logps_train/policy_2_2": -80.18397521972656, "logps_train/policy_2_w": -184.05807495117188, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 0.26975712180137634, "rewards_train/1-l": -1.3133869171142578, "rewards_train/1-w": 2.342118978500366, "rewards_train/2-2": 1.048008680343628, "rewards_train/2-w": 0.8332551717758179, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 3.655505895614624, "rewards_train/margins_1": 2.07236185669899, "rewards_train/margins_2": 0.21475350856781006, "step": 167 }, { "epoch": 0.5, "logps_train/policy_1_2": -160.21658325195312, "logps_train/policy_1_l": -147.3282928466797, "logps_train/policy_1_w": -119.0666275024414, "logps_train/policy_2_2": -141.5231475830078, "logps_train/policy_2_w": -147.73245239257812, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 0.9642800092697144, "rewards_train/1-l": -1.081852912902832, "rewards_train/1-w": 1.8159935474395752, "rewards_train/2-2": 1.499247670173645, "rewards_train/2-w": 1.3111305236816406, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.8978464603424072, "rewards_train/margins_1": 0.8517135381698608, "rewards_train/margins_2": 0.1881171464920044, "step": 167 }, { "epoch": 0.5, "logps_train/policy_1_2": -160.75479125976562, "logps_train/policy_1_l": -245.7276611328125, "logps_train/policy_1_w": -152.00750732421875, "logps_train/policy_2_2": -133.34457397460938, "logps_train/policy_2_w": -187.92193603515625, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.2885825634002686, "rewards_train/1-l": -2.817883253097534, "rewards_train/1-w": 1.8433892726898193, "rewards_train/2-2": 1.9155430793762207, "rewards_train/2-w": 0.8671814203262329, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.6612725257873535, "rewards_train/margins_1": 0.5548067092895508, "rewards_train/margins_2": 1.0483616590499878, "step": 167 }, { "epoch": 0.5, "logps_train/policy_1_2": -114.37226867675781, "logps_train/policy_1_l": -118.48102569580078, "logps_train/policy_1_w": -93.00523376464844, "logps_train/policy_2_2": -84.29945373535156, "logps_train/policy_2_w": -121.25553894042969, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.3405070304870605, "rewards_train/1-l": -1.0439038276672363, "rewards_train/1-w": 1.5697896480560303, "rewards_train/2-2": 2.0827503204345703, "rewards_train/2-w": 0.983820915222168, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.6136934757232666, "rewards_train/margins_1": 0.22928261756896973, "rewards_train/margins_2": 1.0989294052124023, "step": 167 }, { "epoch": 0.5, "logps_train/policy_1_2": -208.81936645507812, "logps_train/policy_1_l": -157.27609252929688, "logps_train/policy_1_w": -109.94551086425781, "logps_train/policy_2_2": -167.44102478027344, "logps_train/policy_2_w": -140.75140380859375, "logps_train/ref_1_2": -225.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.6071252822875977, "rewards_train/1-l": -1.1037813425064087, "rewards_train/1-w": 1.9116994142532349, "rewards_train/2-2": 2.4887099266052246, "rewards_train/2-w": 1.24790620803833, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0154807567596436, "rewards_train/margins_1": 0.3045741319656372, "rewards_train/margins_2": 1.2408037185668945, "step": 167 }, { "epoch": 0.5, "learning_rate": 4.472052593174323e-06, "loss": 0.8758, "step": 168 }, { "epoch": 0.5, "logps_train/policy_1_2": -139.6101531982422, "logps_train/policy_1_l": -155.200439453125, "logps_train/policy_1_w": -156.06332397460938, "logps_train/policy_2_2": -115.13961791992188, "logps_train/policy_2_w": -185.64971923828125, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 0.33429771661758423, "rewards_train/1-l": -0.8739500045776367, "rewards_train/1-w": 1.1803866624832153, "rewards_train/2-2": 0.9805694818496704, "rewards_train/2-w": 0.7967471480369568, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.054336667060852, "rewards_train/margins_1": 0.8460889458656311, "rewards_train/margins_2": 0.18382233381271362, "step": 168 }, { "epoch": 0.5, "logps_train/policy_1_2": -183.3727264404297, "logps_train/policy_1_l": -240.26686096191406, "logps_train/policy_1_w": -241.45619201660156, "logps_train/policy_2_2": -146.00625610351562, "logps_train/policy_2_w": -303.0250244140625, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -268.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -308.0, "rewards_train/1-2": 2.575228214263916, "rewards_train/1-l": -1.7548103332519531, "rewards_train/1-w": 2.685631513595581, "rewards_train/2-2": 3.043124198913574, "rewards_train/2-w": 0.4099985361099243, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.440441846847534, "rewards_train/margins_1": 0.11040329933166504, "rewards_train/margins_2": 2.63312566280365, "step": 168 }, { "epoch": 0.5, "logps_train/policy_1_2": -174.65182495117188, "logps_train/policy_1_l": -249.0955047607422, "logps_train/policy_1_w": -185.3482666015625, "logps_train/policy_2_2": -140.79293823242188, "logps_train/policy_2_w": -224.61431884765625, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -245.0, "rewards_train/1-2": 0.5363800525665283, "rewards_train/1-l": -2.0282998085021973, "rewards_train/1-w": 2.91439151763916, "rewards_train/2-2": 1.632424235343933, "rewards_train/2-w": 2.02138090133667, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.942691326141357, "rewards_train/margins_1": 2.378011465072632, "rewards_train/margins_2": -0.3889566659927368, "step": 168 }, { "epoch": 0.5, "logps_train/policy_1_2": -227.43515014648438, "logps_train/policy_1_l": -294.33941650390625, "logps_train/policy_1_w": -195.0693359375, "logps_train/policy_2_2": -191.3872833251953, "logps_train/policy_2_w": -242.5418243408203, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -272.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -226.0, "logps_train/ref_2_w": -258.0, "rewards_train/1-2": 2.301798105239868, "rewards_train/1-l": -2.2690000534057617, "rewards_train/1-w": 2.9258790016174316, "rewards_train/2-2": 3.515958309173584, "rewards_train/2-w": 1.5270683765411377, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.194879055023193, "rewards_train/margins_1": 0.6240808963775635, "rewards_train/margins_2": 1.9888899326324463, "step": 168 }, { "epoch": 0.5, "logps_train/policy_1_2": -215.53848266601562, "logps_train/policy_1_l": -186.23873901367188, "logps_train/policy_1_w": -169.5390167236328, "logps_train/policy_2_2": -186.94064331054688, "logps_train/policy_2_w": -196.79550170898438, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.639900803565979, "rewards_train/1-l": -1.021140217781067, "rewards_train/1-w": 3.464848279953003, "rewards_train/2-2": 2.440310001373291, "rewards_train/2-w": 2.570448875427246, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 4.48598849773407, "rewards_train/margins_1": 1.824947476387024, "rewards_train/margins_2": -0.13013887405395508, "step": 168 }, { "epoch": 0.5, "logps_train/policy_1_2": -185.11131286621094, "logps_train/policy_1_l": -124.3164291381836, "logps_train/policy_1_w": -128.76583862304688, "logps_train/policy_2_2": -150.79681396484375, "logps_train/policy_2_w": -168.06423950195312, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.166993260383606, "rewards_train/1-l": -1.4662129878997803, "rewards_train/1-w": 2.4577903747558594, "rewards_train/2-2": 2.2671937942504883, "rewards_train/2-w": 1.4810760021209717, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9240033626556396, "rewards_train/margins_1": 1.2907971143722534, "rewards_train/margins_2": 0.7861177921295166, "step": 168 }, { "epoch": 0.5, "logps_train/policy_1_2": -207.05184936523438, "logps_train/policy_1_l": -135.8203125, "logps_train/policy_1_w": -140.3677215576172, "logps_train/policy_2_2": -176.81707763671875, "logps_train/policy_2_w": -171.28768920898438, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 2.101064920425415, "rewards_train/1-l": -0.6203138828277588, "rewards_train/1-w": 2.2597124576568604, "rewards_train/2-2": 2.9663400650024414, "rewards_train/2-w": 1.2267000675201416, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.880026340484619, "rewards_train/margins_1": 0.1586475372314453, "rewards_train/margins_2": 1.7396399974822998, "step": 168 }, { "epoch": 0.5, "logps_train/policy_1_2": -119.9544677734375, "logps_train/policy_1_l": -116.6141357421875, "logps_train/policy_1_w": -114.68594360351562, "logps_train/policy_2_2": -102.57951354980469, "logps_train/policy_2_w": -146.1304931640625, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.728381633758545, "rewards_train/1-l": -1.2603156566619873, "rewards_train/1-w": 2.2778897285461426, "rewards_train/2-2": 2.396346092224121, "rewards_train/2-w": 1.044373869895935, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.53820538520813, "rewards_train/margins_1": 0.5495080947875977, "rewards_train/margins_2": 1.351972222328186, "step": 168 }, { "epoch": 0.51, "logps_train/policy_1_2": -196.7717742919922, "logps_train/policy_1_l": -182.31649780273438, "logps_train/policy_1_w": -142.28158569335938, "logps_train/policy_2_2": -166.26812744140625, "logps_train/policy_2_w": -174.53549194335938, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.5134472846984863, "rewards_train/1-l": -1.9789149761199951, "rewards_train/1-w": 2.473794937133789, "rewards_train/2-2": 2.4274845123291016, "rewards_train/2-w": 1.4503566026687622, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.452709913253784, "rewards_train/margins_1": 0.9603476524353027, "rewards_train/margins_2": 0.9771279096603394, "step": 169 }, { "epoch": 0.51, "logps_train/policy_1_2": -165.3514404296875, "logps_train/policy_1_l": -140.6087188720703, "logps_train/policy_1_w": -100.34013366699219, "logps_train/policy_2_2": -130.5809326171875, "logps_train/policy_2_w": -124.6551513671875, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.3414175510406494, "rewards_train/1-l": -1.0835282802581787, "rewards_train/1-w": 2.4409866333007812, "rewards_train/2-2": 2.403625011444092, "rewards_train/2-w": 1.8907349109649658, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.52451491355896, "rewards_train/margins_1": 1.0995690822601318, "rewards_train/margins_2": 0.512890100479126, "step": 169 }, { "epoch": 0.51, "logps_train/policy_1_2": -278.6710510253906, "logps_train/policy_1_l": -192.36170959472656, "logps_train/policy_1_w": -159.58740234375, "logps_train/policy_2_2": -233.51242065429688, "logps_train/policy_2_w": -193.60238647460938, "logps_train/ref_1_2": -300.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -270.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 2.0203945636749268, "rewards_train/1-l": -2.079920530319214, "rewards_train/1-w": 3.0475106239318848, "rewards_train/2-2": 3.711259365081787, "rewards_train/2-w": 1.908511757850647, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.127431154251099, "rewards_train/margins_1": 1.027116060256958, "rewards_train/margins_2": 1.8027476072311401, "step": 169 }, { "epoch": 0.51, "logps_train/policy_1_2": -102.77445983886719, "logps_train/policy_1_l": -85.36908721923828, "logps_train/policy_1_w": -70.74977111816406, "logps_train/policy_2_2": -80.361572265625, "logps_train/policy_2_w": -91.428466796875, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -82.5, "logps_train/ref_1_w": -83.5, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -99.5, "rewards_train/1-2": 1.1467721462249756, "rewards_train/1-l": -0.31151819229125977, "rewards_train/1-w": 1.2738511562347412, "rewards_train/2-2": 1.6427488327026367, "rewards_train/2-w": 0.7981696128845215, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.585369348526001, "rewards_train/margins_1": 0.12707901000976562, "rewards_train/margins_2": 0.8445792198181152, "step": 169 }, { "epoch": 0.51, "logps_train/policy_1_2": -155.42221069335938, "logps_train/policy_1_l": -211.8119659423828, "logps_train/policy_1_w": -129.0806884765625, "logps_train/policy_2_2": -136.29852294921875, "logps_train/policy_2_w": -146.29034423828125, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.5083650350570679, "rewards_train/1-l": -1.3136175870895386, "rewards_train/1-w": 1.5081427097320557, "rewards_train/2-2": 1.9367485046386719, "rewards_train/2-w": 1.1621768474578857, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.8217602968215942, "rewards_train/margins_1": -0.00022232532501220703, "rewards_train/margins_2": 0.7745716571807861, "step": 169 }, { "epoch": 0.51, "logps_train/policy_1_2": -216.59075927734375, "logps_train/policy_1_l": -223.69119262695312, "logps_train/policy_1_w": -167.51724243164062, "logps_train/policy_2_2": -176.1729278564453, "logps_train/policy_2_w": -206.1782684326172, "logps_train/ref_1_2": -233.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -225.0, "rewards_train/1-2": 1.6268612146377563, "rewards_train/1-l": -1.5183380842208862, "rewards_train/1-w": 3.4404640197753906, "rewards_train/2-2": 3.206925630569458, "rewards_train/2-w": 1.9345176219940186, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.958802103996277, "rewards_train/margins_1": 1.8136028051376343, "rewards_train/margins_2": 1.2724080085754395, "step": 169 }, { "epoch": 0.51, "logps_train/policy_1_2": -162.84226989746094, "logps_train/policy_1_l": -147.67758178710938, "logps_train/policy_1_w": -147.9197235107422, "logps_train/policy_2_2": -144.1251220703125, "logps_train/policy_2_w": -184.63385009765625, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.0642104148864746, "rewards_train/1-l": -0.9759610891342163, "rewards_train/1-w": 1.9566597938537598, "rewards_train/2-2": 1.4740105867385864, "rewards_train/2-w": 0.8998959064483643, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.932620882987976, "rewards_train/margins_1": 0.8924493789672852, "rewards_train/margins_2": 0.5741146802902222, "step": 169 }, { "epoch": 0.51, "logps_train/policy_1_2": -214.02798461914062, "logps_train/policy_1_l": -184.46493530273438, "logps_train/policy_1_w": -208.0313720703125, "logps_train/policy_2_2": -172.1001739501953, "logps_train/policy_2_w": -257.94281005859375, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -238.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -274.0, "rewards_train/1-2": 1.450716495513916, "rewards_train/1-l": -1.4777427911758423, "rewards_train/1-w": 3.001549243927002, "rewards_train/2-2": 2.7106857299804688, "rewards_train/2-w": 1.5754460096359253, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.479292035102844, "rewards_train/margins_1": 1.550832748413086, "rewards_train/margins_2": 1.1352397203445435, "step": 169 }, { "epoch": 0.51, "learning_rate": 4.456776613958683e-06, "loss": 0.8046, "step": 170 }, { "epoch": 0.51, "logps_train/policy_1_2": -114.15174865722656, "logps_train/policy_1_l": -81.63216400146484, "logps_train/policy_1_w": -110.48743438720703, "logps_train/policy_2_2": -92.96821594238281, "logps_train/policy_2_w": -136.84158325195312, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -74.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 0.867638349533081, "rewards_train/1-l": -0.7528645992279053, "rewards_train/1-w": 1.3887569904327393, "rewards_train/2-2": 1.4340380430221558, "rewards_train/2-w": 0.7834197282791138, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.1416215896606445, "rewards_train/margins_1": 0.5211186408996582, "rewards_train/margins_2": 0.650618314743042, "step": 170 }, { "epoch": 0.51, "logps_train/policy_1_2": -167.51235961914062, "logps_train/policy_1_l": -181.54888916015625, "logps_train/policy_1_w": -124.46441650390625, "logps_train/policy_2_2": -122.00686645507812, "logps_train/policy_2_w": -157.23300170898438, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.0972011089324951, "rewards_train/1-l": -1.6763733625411987, "rewards_train/1-w": 1.9969185590744019, "rewards_train/2-2": 2.482125759124756, "rewards_train/2-w": 1.2806055545806885, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6732919216156006, "rewards_train/margins_1": 0.8997174501419067, "rewards_train/margins_2": 1.2015202045440674, "step": 170 }, { "epoch": 0.51, "logps_train/policy_1_2": -202.27999877929688, "logps_train/policy_1_l": -211.19732666015625, "logps_train/policy_1_w": -131.81105041503906, "logps_train/policy_2_2": -157.47076416015625, "logps_train/policy_2_w": -174.69805908203125, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.5094987154006958, "rewards_train/1-l": -2.7166073322296143, "rewards_train/1-w": 2.5858867168426514, "rewards_train/2-2": 2.6154239177703857, "rewards_train/2-w": 1.602654218673706, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.302494049072266, "rewards_train/margins_1": 1.0763880014419556, "rewards_train/margins_2": 1.0127696990966797, "step": 170 }, { "epoch": 0.51, "logps_train/policy_1_2": -111.39961242675781, "logps_train/policy_1_l": -99.16331481933594, "logps_train/policy_1_w": -113.39690399169922, "logps_train/policy_2_2": -87.10746002197266, "logps_train/policy_2_w": -146.43130493164062, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 0.5256636142730713, "rewards_train/1-l": -0.6780500411987305, "rewards_train/1-w": 2.263434410095215, "rewards_train/2-2": 0.9564415216445923, "rewards_train/2-w": 1.328743577003479, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.9414844512939453, "rewards_train/margins_1": 1.7377707958221436, "rewards_train/margins_2": -0.3723020553588867, "step": 170 }, { "epoch": 0.51, "logps_train/policy_1_2": -199.65028381347656, "logps_train/policy_1_l": -138.84423828125, "logps_train/policy_1_w": -145.10971069335938, "logps_train/policy_2_2": -152.27459716796875, "logps_train/policy_2_w": -189.40853881835938, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 0.5912216901779175, "rewards_train/1-l": -0.6539561152458191, "rewards_train/1-w": 2.2905919551849365, "rewards_train/2-2": 1.8725390434265137, "rewards_train/2-w": 1.0357087850570679, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.9445480704307556, "rewards_train/margins_1": 1.699370265007019, "rewards_train/margins_2": 0.8368302583694458, "step": 170 }, { "epoch": 0.51, "logps_train/policy_1_2": -117.27407836914062, "logps_train/policy_1_l": -98.99213409423828, "logps_train/policy_1_w": -65.22329711914062, "logps_train/policy_2_2": -93.35123443603516, "logps_train/policy_2_w": -76.90650939941406, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -76.5, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -85.0, "rewards_train/1-2": 1.1194669008255005, "rewards_train/1-l": -0.6419504880905151, "rewards_train/1-w": 1.1392297744750977, "rewards_train/2-2": 1.7734706401824951, "rewards_train/2-w": 0.8027091026306152, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.7811802625656128, "rewards_train/margins_1": 0.019762873649597168, "rewards_train/margins_2": 0.9707615375518799, "step": 170 }, { "epoch": 0.51, "logps_train/policy_1_2": -164.79554748535156, "logps_train/policy_1_l": -110.22793579101562, "logps_train/policy_1_w": -103.72456359863281, "logps_train/policy_2_2": -136.19876098632812, "logps_train/policy_2_w": -133.0265350341797, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -98.5, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 1.6016945838928223, "rewards_train/1-l": -1.1665436029434204, "rewards_train/1-w": 2.258793830871582, "rewards_train/2-2": 2.494185447692871, "rewards_train/2-w": 1.5942217111587524, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4253374338150024, "rewards_train/margins_1": 0.6570992469787598, "rewards_train/margins_2": 0.8999637365341187, "step": 170 }, { "epoch": 0.51, "logps_train/policy_1_2": -174.7888641357422, "logps_train/policy_1_l": -265.2760009765625, "logps_train/policy_1_w": -196.1727752685547, "logps_train/policy_2_2": -132.87667846679688, "logps_train/policy_2_w": -257.3524475097656, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -224.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -264.0, "rewards_train/1-2": 1.5273631811141968, "rewards_train/1-l": -4.087951183319092, "rewards_train/1-w": 2.3620200157165527, "rewards_train/2-2": 2.731081962585449, "rewards_train/2-w": 0.51006680727005, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.4499711990356445, "rewards_train/margins_1": 0.834656834602356, "rewards_train/margins_2": 2.221015155315399, "step": 170 }, { "epoch": 0.51, "logps_train/policy_1_2": -153.6899871826172, "logps_train/policy_1_l": -142.21937561035156, "logps_train/policy_1_w": -97.02507019042969, "logps_train/policy_2_2": -114.62327575683594, "logps_train/policy_2_w": -122.97196960449219, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.398188591003418, "rewards_train/1-l": -0.7686161994934082, "rewards_train/1-w": 1.7185864448547363, "rewards_train/2-2": 2.525953769683838, "rewards_train/2-w": 1.0692094564437866, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.4872026443481445, "rewards_train/margins_1": 0.32039785385131836, "rewards_train/margins_2": 1.4567443132400513, "step": 171 }, { "epoch": 0.51, "logps_train/policy_1_2": -224.43423461914062, "logps_train/policy_1_l": -311.26019287109375, "logps_train/policy_1_w": -196.05322265625, "logps_train/policy_2_2": -180.06240844726562, "logps_train/policy_2_w": -253.7478790283203, "logps_train/ref_1_2": -237.0, "logps_train/ref_1_l": -290.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 1.303452968597412, "rewards_train/1-l": -2.1408634185791016, "rewards_train/1-w": 2.779834270477295, "rewards_train/2-2": 2.431260108947754, "rewards_train/2-w": 1.3181803226470947, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.9206976890563965, "rewards_train/margins_1": 1.4763813018798828, "rewards_train/margins_2": 1.1130797863006592, "step": 171 }, { "epoch": 0.51, "logps_train/policy_1_2": -159.1593017578125, "logps_train/policy_1_l": -145.25173950195312, "logps_train/policy_1_w": -158.44522094726562, "logps_train/policy_2_2": -131.23983764648438, "logps_train/policy_2_w": -194.03427124023438, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.8309451341629028, "rewards_train/1-l": -0.47751688957214355, "rewards_train/1-w": 2.252352237701416, "rewards_train/2-2": 2.50101637840271, "rewards_train/2-w": 0.9676669836044312, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.7298691272735596, "rewards_train/margins_1": 0.4214071035385132, "rewards_train/margins_2": 1.5333493947982788, "step": 171 }, { "epoch": 0.51, "logps_train/policy_1_2": -139.8398895263672, "logps_train/policy_1_l": -109.19284057617188, "logps_train/policy_1_w": -66.7977294921875, "logps_train/policy_2_2": -116.93557739257812, "logps_train/policy_2_w": -91.92059326171875, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -107.5, "rewards_train/1-2": 1.1826133728027344, "rewards_train/1-l": -0.6448702216148376, "rewards_train/1-w": 2.1862428188323975, "rewards_train/2-2": 2.0961878299713135, "rewards_train/2-w": 1.5860657691955566, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.831113040447235, "rewards_train/margins_1": 1.003629446029663, "rewards_train/margins_2": 0.5101220607757568, "step": 171 }, { "epoch": 0.51, "logps_train/policy_1_2": -161.10971069335938, "logps_train/policy_1_l": -166.6773681640625, "logps_train/policy_1_w": -170.92092895507812, "logps_train/policy_2_2": -133.22039794921875, "logps_train/policy_2_w": -191.71728515625, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.2663722038269043, "rewards_train/1-l": -1.2342169284820557, "rewards_train/1-w": 2.7825169563293457, "rewards_train/2-2": 2.0553030967712402, "rewards_train/2-w": 2.1929190158843994, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.016733884811401, "rewards_train/margins_1": 1.5161447525024414, "rewards_train/margins_2": -0.13761591911315918, "step": 171 }, { "epoch": 0.51, "logps_train/policy_1_2": -176.63314819335938, "logps_train/policy_1_l": -116.07634735107422, "logps_train/policy_1_w": -119.28070068359375, "logps_train/policy_2_2": -116.28012084960938, "logps_train/policy_2_w": -155.112548828125, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.1523092985153198, "rewards_train/1-l": -0.7826342582702637, "rewards_train/1-w": 2.4563045501708984, "rewards_train/2-2": 2.73370623588562, "rewards_train/2-w": 1.4715580940246582, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.238938808441162, "rewards_train/margins_1": 1.3039952516555786, "rewards_train/margins_2": 1.262148141860962, "step": 171 }, { "epoch": 0.51, "logps_train/policy_1_2": -117.72589111328125, "logps_train/policy_1_l": -146.47645568847656, "logps_train/policy_1_w": -105.34856414794922, "logps_train/policy_2_2": -93.50130462646484, "logps_train/policy_2_w": -131.10801696777344, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 1.1090521812438965, "rewards_train/1-l": -1.1091691255569458, "rewards_train/1-w": 2.0788156986236572, "rewards_train/2-2": 1.8053386211395264, "rewards_train/2-w": 1.1931040287017822, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.187984824180603, "rewards_train/margins_1": 0.9697635173797607, "rewards_train/margins_2": 0.6122345924377441, "step": 171 }, { "epoch": 0.51, "logps_train/policy_1_2": -178.12026977539062, "logps_train/policy_1_l": -186.6150665283203, "logps_train/policy_1_w": -140.1637725830078, "logps_train/policy_2_2": -155.37890625, "logps_train/policy_2_w": -169.66909790039062, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.36922287940979, "rewards_train/1-l": -1.459943413734436, "rewards_train/1-w": 2.0773730278015137, "rewards_train/2-2": 2.162109851837158, "rewards_train/2-w": 1.5080902576446533, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5373164415359497, "rewards_train/margins_1": 0.7081501483917236, "rewards_train/margins_2": 0.6540195941925049, "step": 171 }, { "epoch": 0.51, "learning_rate": 4.441309656795106e-06, "loss": 0.7215, "step": 172 }, { "epoch": 0.51, "logps_train/policy_1_2": -108.87641143798828, "logps_train/policy_1_l": -134.04930114746094, "logps_train/policy_1_w": -135.10330200195312, "logps_train/policy_2_2": -81.17459869384766, "logps_train/policy_2_w": -180.10073852539062, "logps_train/ref_1_2": -125.5, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.6389214992523193, "rewards_train/1-l": -0.9515600204467773, "rewards_train/1-w": 1.8677952289581299, "rewards_train/2-2": 2.249727964401245, "rewards_train/2-w": 0.8524265289306641, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.8193552494049072, "rewards_train/margins_1": 0.22887372970581055, "rewards_train/margins_2": 1.397301435470581, "step": 172 }, { "epoch": 0.51, "logps_train/policy_1_2": -106.88182067871094, "logps_train/policy_1_l": -102.69709777832031, "logps_train/policy_1_w": -70.97650146484375, "logps_train/policy_2_2": -86.1304931640625, "logps_train/policy_2_w": -91.34198760986328, "logps_train/ref_1_2": -115.5, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -102.5, "logps_train/ref_2_w": -107.5, "rewards_train/1-2": 0.8364019393920898, "rewards_train/1-l": -0.6679515242576599, "rewards_train/1-w": 1.9414128065109253, "rewards_train/2-2": 1.628454566001892, "rewards_train/2-w": 1.6158009767532349, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.609364330768585, "rewards_train/margins_1": 1.1050108671188354, "rewards_train/margins_2": 0.012653589248657227, "step": 172 }, { "epoch": 0.51, "logps_train/policy_1_2": -163.91339111328125, "logps_train/policy_1_l": -225.0792236328125, "logps_train/policy_1_w": -136.58547973632812, "logps_train/policy_2_2": -128.83120727539062, "logps_train/policy_2_w": -178.2762451171875, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 0.8578802943229675, "rewards_train/1-l": -3.0454232692718506, "rewards_train/1-w": 2.686764717102051, "rewards_train/2-2": 1.9340661764144897, "rewards_train/2-w": 1.7223749160766602, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.732187986373901, "rewards_train/margins_1": 1.8288844227790833, "rewards_train/margins_2": 0.2116912603378296, "step": 172 }, { "epoch": 0.51, "logps_train/policy_1_2": -159.3469696044922, "logps_train/policy_1_l": -163.80128479003906, "logps_train/policy_1_w": -125.0452880859375, "logps_train/policy_2_2": -134.84799194335938, "logps_train/policy_2_w": -150.95848083496094, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.0496779680252075, "rewards_train/1-l": -1.3721215724945068, "rewards_train/1-w": 0.8267215490341187, "rewards_train/2-2": 1.7183256149291992, "rewards_train/2-w": 0.24946439266204834, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.1988431215286255, "rewards_train/margins_1": -0.22295641899108887, "rewards_train/margins_2": 1.4688612222671509, "step": 172 }, { "epoch": 0.51, "logps_train/policy_1_2": -123.77754974365234, "logps_train/policy_1_l": -97.05271911621094, "logps_train/policy_1_w": -65.14647674560547, "logps_train/policy_2_2": -106.77083587646484, "logps_train/policy_2_w": -80.93528747558594, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -90.0, "rewards_train/1-2": 1.5909945964813232, "rewards_train/1-l": -1.0083978176116943, "rewards_train/1-w": 1.3107434511184692, "rewards_train/2-2": 2.050260066986084, "rewards_train/2-w": 0.9260031580924988, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.3191412687301636, "rewards_train/margins_1": -0.280251145362854, "rewards_train/margins_2": 1.1242569088935852, "step": 172 }, { "epoch": 0.51, "logps_train/policy_1_2": -107.91986083984375, "logps_train/policy_1_l": -87.79574584960938, "logps_train/policy_1_w": -101.7847671508789, "logps_train/policy_2_2": -83.79147338867188, "logps_train/policy_2_w": -115.19332885742188, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -83.5, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -99.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 0.8087946176528931, "rewards_train/1-l": -0.42293408513069153, "rewards_train/1-w": 1.3254295587539673, "rewards_train/2-2": 1.5399930477142334, "rewards_train/2-w": 0.8158236145973206, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.7483636438846588, "rewards_train/margins_1": 0.5166349411010742, "rewards_train/margins_2": 0.7241694331169128, "step": 172 }, { "epoch": 0.51, "logps_train/policy_1_2": -174.0906219482422, "logps_train/policy_1_l": -159.42347717285156, "logps_train/policy_1_w": -106.84318542480469, "logps_train/policy_2_2": -147.3636474609375, "logps_train/policy_2_w": -124.59651947021484, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.2800002098083496, "rewards_train/1-l": -1.539223074913025, "rewards_train/1-w": 1.30630624294281, "rewards_train/2-2": 1.8792610168457031, "rewards_train/2-w": 0.9262858033180237, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.845529317855835, "rewards_train/margins_1": 0.02630603313446045, "rewards_train/margins_2": 0.9529752135276794, "step": 172 }, { "epoch": 0.51, "logps_train/policy_1_2": -183.27261352539062, "logps_train/policy_1_l": -173.41102600097656, "logps_train/policy_1_w": -175.55923461914062, "logps_train/policy_2_2": -147.70596313476562, "logps_train/policy_2_w": -228.44589233398438, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -237.0, "rewards_train/1-2": 1.3946144580841064, "rewards_train/1-l": -1.7731339931488037, "rewards_train/1-w": 2.5925133228302, "rewards_train/2-2": 2.2387800216674805, "rewards_train/2-w": 0.8210353851318359, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.365647315979004, "rewards_train/margins_1": 1.1978988647460938, "rewards_train/margins_2": 1.4177446365356445, "step": 172 }, { "epoch": 0.52, "logps_train/policy_1_2": -206.48687744140625, "logps_train/policy_1_l": -187.35079956054688, "logps_train/policy_1_w": -208.7515869140625, "logps_train/policy_2_2": -165.92153930664062, "logps_train/policy_2_w": -257.252685546875, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -241.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -276.0, "rewards_train/1-2": 1.441936731338501, "rewards_train/1-l": -1.955392599105835, "rewards_train/1-w": 3.1935911178588867, "rewards_train/2-2": 2.426595449447632, "rewards_train/2-w": 1.835671067237854, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.148983716964722, "rewards_train/margins_1": 1.7516543865203857, "rewards_train/margins_2": 0.5909243822097778, "step": 173 }, { "epoch": 0.52, "logps_train/policy_1_2": -224.8888397216797, "logps_train/policy_1_l": -235.03973388671875, "logps_train/policy_1_w": -205.1492156982422, "logps_train/policy_2_2": -185.1741485595703, "logps_train/policy_2_w": -241.66297912597656, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -236.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 1.1423656940460205, "rewards_train/1-l": -2.3203799724578857, "rewards_train/1-w": 3.1016793251037598, "rewards_train/2-2": 2.3302409648895264, "rewards_train/2-w": 1.7712026834487915, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.4220592975616455, "rewards_train/margins_1": 1.9593136310577393, "rewards_train/margins_2": 0.5590382814407349, "step": 173 }, { "epoch": 0.52, "logps_train/policy_1_2": -85.64900207519531, "logps_train/policy_1_l": -60.61186218261719, "logps_train/policy_1_w": -44.79890441894531, "logps_train/policy_2_2": -74.79886627197266, "logps_train/policy_2_w": -55.13297653198242, "logps_train/ref_1_2": -91.0, "logps_train/ref_1_l": -53.75, "logps_train/ref_1_w": -51.25, "logps_train/ref_2_2": -82.0, "logps_train/ref_2_w": -58.0, "rewards_train/1-2": 0.5362714529037476, "rewards_train/1-l": -0.6826707124710083, "rewards_train/1-w": 0.6533128023147583, "rewards_train/2-2": 0.701363742351532, "rewards_train/2-w": 0.27732735872268677, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.3359835147857666, "rewards_train/margins_1": 0.11704134941101074, "rewards_train/margins_2": 0.4240363836288452, "step": 173 }, { "epoch": 0.52, "logps_train/policy_1_2": -171.12234497070312, "logps_train/policy_1_l": -217.77272033691406, "logps_train/policy_1_w": -167.8795928955078, "logps_train/policy_2_2": -129.52615356445312, "logps_train/policy_2_w": -232.23049926757812, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -193.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.4252650737762451, "rewards_train/1-l": -2.4390883445739746, "rewards_train/1-w": 2.637040853500366, "rewards_train/2-2": 2.4380102157592773, "rewards_train/2-w": 0.5894505977630615, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.076129198074341, "rewards_train/margins_1": 1.211775779724121, "rewards_train/margins_2": 1.8485596179962158, "step": 173 }, { "epoch": 0.52, "logps_train/policy_1_2": -94.81808471679688, "logps_train/policy_1_l": -84.43962097167969, "logps_train/policy_1_w": -86.31275939941406, "logps_train/policy_2_2": -82.26193237304688, "logps_train/policy_2_w": -94.14237976074219, "logps_train/ref_1_2": -106.0, "logps_train/ref_1_l": -79.5, "logps_train/ref_1_w": -98.5, "logps_train/ref_2_2": -99.0, "logps_train/ref_2_w": -104.0, "rewards_train/1-2": 1.1447536945343018, "rewards_train/1-l": -0.5017744898796082, "rewards_train/1-w": 1.2269763946533203, "rewards_train/2-2": 1.642068862915039, "rewards_train/2-w": 0.9870798587799072, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.7287508845329285, "rewards_train/margins_1": 0.08222270011901855, "rewards_train/margins_2": 0.6549890041351318, "step": 173 }, { "epoch": 0.52, "logps_train/policy_1_2": -248.20538330078125, "logps_train/policy_1_l": -218.96954345703125, "logps_train/policy_1_w": -198.9520721435547, "logps_train/policy_2_2": -204.53529357910156, "logps_train/policy_2_w": -231.35079956054688, "logps_train/ref_1_2": -264.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -239.0, "logps_train/ref_2_2": -230.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 1.5013375282287598, "rewards_train/1-l": -1.2692208290100098, "rewards_train/1-w": 4.0211992263793945, "rewards_train/2-2": 2.5214710235595703, "rewards_train/2-w": 2.946169853210449, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 5.290420055389404, "rewards_train/margins_1": 2.5198616981506348, "rewards_train/margins_2": -0.4246988296508789, "step": 173 }, { "epoch": 0.52, "logps_train/policy_1_2": -158.91583251953125, "logps_train/policy_1_l": -171.5931396484375, "logps_train/policy_1_w": -101.88558197021484, "logps_train/policy_2_2": -135.4294891357422, "logps_train/policy_2_w": -123.89018249511719, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 1.5738965272903442, "rewards_train/1-l": -1.8157103061676025, "rewards_train/1-w": 1.484488844871521, "rewards_train/2-2": 1.8750447034835815, "rewards_train/2-w": 1.0719197988510132, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3001991510391235, "rewards_train/margins_1": -0.08940768241882324, "rewards_train/margins_2": 0.8031249046325684, "step": 173 }, { "epoch": 0.52, "logps_train/policy_1_2": -109.85331726074219, "logps_train/policy_1_l": -146.01429748535156, "logps_train/policy_1_w": -91.40725708007812, "logps_train/policy_2_2": -98.09309387207031, "logps_train/policy_2_w": -111.95021057128906, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 1.1076364517211914, "rewards_train/1-l": -1.562319278717041, "rewards_train/1-w": 1.6108372211456299, "rewards_train/2-2": 1.47506582736969, "rewards_train/2-w": 1.2362291812896729, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.173156499862671, "rewards_train/margins_1": 0.5032007694244385, "rewards_train/margins_2": 0.2388366460800171, "step": 173 }, { "epoch": 0.52, "learning_rate": 4.425653231231344e-06, "loss": 0.8895, "step": 174 }, { "epoch": 0.52, "logps_train/policy_1_2": -244.71585083007812, "logps_train/policy_1_l": -172.7014923095703, "logps_train/policy_1_w": -142.19375610351562, "logps_train/policy_2_2": -190.0358123779297, "logps_train/policy_2_w": -179.87332153320312, "logps_train/ref_1_2": -256.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 1.0456023216247559, "rewards_train/1-l": -1.869368076324463, "rewards_train/1-w": 2.7860918045043945, "rewards_train/2-2": 3.1776700019836426, "rewards_train/2-w": 2.073606014251709, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.655459880828857, "rewards_train/margins_1": 1.7404894828796387, "rewards_train/margins_2": 1.1040639877319336, "step": 174 }, { "epoch": 0.52, "logps_train/policy_1_2": -126.39912414550781, "logps_train/policy_1_l": -113.35462951660156, "logps_train/policy_1_w": -124.77445983886719, "logps_train/policy_2_2": -107.91253662109375, "logps_train/policy_2_w": -154.02540588378906, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 1.0038373470306396, "rewards_train/1-l": -0.41593223810195923, "rewards_train/1-w": 1.2897417545318604, "rewards_train/2-2": 1.3032774925231934, "rewards_train/2-w": 0.48652228713035583, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.7056739926338196, "rewards_train/margins_1": 0.2859044075012207, "rewards_train/margins_2": 0.8167552053928375, "step": 174 }, { "epoch": 0.52, "logps_train/policy_1_2": -104.51738739013672, "logps_train/policy_1_l": -76.69979858398438, "logps_train/policy_1_w": -71.40605163574219, "logps_train/policy_2_2": -85.9969711303711, "logps_train/policy_2_w": -85.36567687988281, "logps_train/ref_1_2": -114.5, "logps_train/ref_1_l": -67.5, "logps_train/ref_1_w": -82.0, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -91.5, "rewards_train/1-2": 0.9716988801956177, "rewards_train/1-l": -0.8891202211380005, "rewards_train/1-w": 1.0832228660583496, "rewards_train/2-2": 1.3831154108047485, "rewards_train/2-w": 0.6157764196395874, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.97234308719635, "rewards_train/margins_1": 0.11152398586273193, "rewards_train/margins_2": 0.7673389911651611, "step": 174 }, { "epoch": 0.52, "logps_train/policy_1_2": -158.06411743164062, "logps_train/policy_1_l": -131.94692993164062, "logps_train/policy_1_w": -142.44659423828125, "logps_train/policy_2_2": -124.12132263183594, "logps_train/policy_2_w": -172.70620727539062, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.4388997554779053, "rewards_train/1-l": -1.2806315422058105, "rewards_train/1-w": 2.738152265548706, "rewards_train/2-2": 2.2909929752349854, "rewards_train/2-w": 1.9575034379959106, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.018783807754517, "rewards_train/margins_1": 1.2992525100708008, "rewards_train/margins_2": 0.3334895372390747, "step": 174 }, { "epoch": 0.52, "logps_train/policy_1_2": -128.63931274414062, "logps_train/policy_1_l": -123.60427856445312, "logps_train/policy_1_w": -119.74034118652344, "logps_train/policy_2_2": -102.78494262695312, "logps_train/policy_2_w": -143.08511352539062, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 0.8954441547393799, "rewards_train/1-l": -1.0147247314453125, "rewards_train/1-w": 2.0994038581848145, "rewards_train/2-2": 1.5847865343093872, "rewards_train/2-w": 1.7008644342422485, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 3.114128589630127, "rewards_train/margins_1": 1.2039597034454346, "rewards_train/margins_2": -0.11607789993286133, "step": 174 }, { "epoch": 0.52, "logps_train/policy_1_2": -82.06501770019531, "logps_train/policy_1_l": -107.44734954833984, "logps_train/policy_1_w": -83.47696685791016, "logps_train/policy_2_2": -64.22164916992188, "logps_train/policy_2_w": -137.22024536132812, "logps_train/ref_1_2": -92.5, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -77.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.0638107061386108, "rewards_train/1-l": -1.305672287940979, "rewards_train/1-w": 2.3023033142089844, "rewards_train/2-2": 1.2825227975845337, "rewards_train/2-w": 1.093599796295166, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.6079756021499634, "rewards_train/margins_1": 1.2384926080703735, "rewards_train/margins_2": 0.18892300128936768, "step": 174 }, { "epoch": 0.52, "logps_train/policy_1_2": -142.19192504882812, "logps_train/policy_1_l": -90.5400619506836, "logps_train/policy_1_w": -112.64825439453125, "logps_train/policy_2_2": -118.0635757446289, "logps_train/policy_2_w": -137.47543334960938, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -80.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.3944785594940186, "rewards_train/1-l": -1.0331075191497803, "rewards_train/1-w": 2.0884957313537598, "rewards_train/2-2": 2.022158145904541, "rewards_train/2-w": 1.3266748189926147, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.12160325050354, "rewards_train/margins_1": 0.6940171718597412, "rewards_train/margins_2": 0.6954833269119263, "step": 174 }, { "epoch": 0.52, "logps_train/policy_1_2": -138.23806762695312, "logps_train/policy_1_l": -132.5619659423828, "logps_train/policy_1_w": -128.7736053466797, "logps_train/policy_2_2": -107.1202163696289, "logps_train/policy_2_w": -166.1782989501953, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.3933799266815186, "rewards_train/1-l": -1.6645947694778442, "rewards_train/1-w": 1.9960769414901733, "rewards_train/2-2": 2.337197780609131, "rewards_train/2-w": 0.7649832963943481, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6606717109680176, "rewards_train/margins_1": 0.6026970148086548, "rewards_train/margins_2": 1.5722144842147827, "step": 174 }, { "epoch": 0.52, "logps_train/policy_1_2": -199.12770080566406, "logps_train/policy_1_l": -219.25277709960938, "logps_train/policy_1_w": -154.25111389160156, "logps_train/policy_2_2": -164.34344482421875, "logps_train/policy_2_w": -173.17745971679688, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.9778549671173096, "rewards_train/1-l": -2.375277042388916, "rewards_train/1-w": 2.8123884201049805, "rewards_train/2-2": 3.06565523147583, "rewards_train/2-w": 2.2353787422180176, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.1876654624938965, "rewards_train/margins_1": 0.8345334529876709, "rewards_train/margins_2": 0.8302764892578125, "step": 175 }, { "epoch": 0.52, "logps_train/policy_1_2": -143.71499633789062, "logps_train/policy_1_l": -133.7943572998047, "logps_train/policy_1_w": -173.54055786132812, "logps_train/policy_2_2": -109.57892608642578, "logps_train/policy_2_w": -211.63491821289062, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.4136579036712646, "rewards_train/1-l": -1.282755970954895, "rewards_train/1-w": 1.0412558317184448, "rewards_train/2-2": 2.1481621265411377, "rewards_train/2-w": -0.5400547385215759, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.32401180267334, "rewards_train/margins_1": -0.3724020719528198, "rewards_train/margins_2": 2.6882168650627136, "step": 175 }, { "epoch": 0.52, "logps_train/policy_1_2": -169.9112548828125, "logps_train/policy_1_l": -173.52899169921875, "logps_train/policy_1_w": -136.24685668945312, "logps_train/policy_2_2": -146.2296142578125, "logps_train/policy_2_w": -184.96237182617188, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 1.040124535560608, "rewards_train/1-l": -0.610028088092804, "rewards_train/1-w": 1.4383013248443604, "rewards_train/2-2": 1.6067262887954712, "rewards_train/2-w": 0.22720077633857727, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.0483294129371643, "rewards_train/margins_1": 0.39817678928375244, "rewards_train/margins_2": 1.379525512456894, "step": 175 }, { "epoch": 0.52, "logps_train/policy_1_2": -110.69536590576172, "logps_train/policy_1_l": -162.82061767578125, "logps_train/policy_1_w": -129.49578857421875, "logps_train/policy_2_2": -84.80187225341797, "logps_train/policy_2_w": -171.60092163085938, "logps_train/ref_1_2": -120.5, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 0.961322546005249, "rewards_train/1-l": -2.5546460151672363, "rewards_train/1-w": 1.4957334995269775, "rewards_train/2-2": 1.6938362121582031, "rewards_train/2-w": 0.08287601172924042, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.050379514694214, "rewards_train/margins_1": 0.5344109535217285, "rewards_train/margins_2": 1.6109602004289627, "step": 175 }, { "epoch": 0.52, "logps_train/policy_1_2": -195.14596557617188, "logps_train/policy_1_l": -209.46365356445312, "logps_train/policy_1_w": -132.29751586914062, "logps_train/policy_2_2": -156.51055908203125, "logps_train/policy_2_w": -170.31414794921875, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.2354042530059814, "rewards_train/1-l": -2.5979275703430176, "rewards_train/1-w": 2.4733726978302, "rewards_train/2-2": 2.4301939010620117, "rewards_train/2-w": 1.8185852766036987, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.071300268173218, "rewards_train/margins_1": 1.2379684448242188, "rewards_train/margins_2": 0.611608624458313, "step": 175 }, { "epoch": 0.52, "logps_train/policy_1_2": -127.21011352539062, "logps_train/policy_1_l": -169.28225708007812, "logps_train/policy_1_w": -128.9697265625, "logps_train/policy_2_2": -107.02352142333984, "logps_train/policy_2_w": -152.685302734375, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.2348477840423584, "rewards_train/1-l": -1.0430693626403809, "rewards_train/1-w": 2.23427677154541, "rewards_train/2-2": 1.7390539646148682, "rewards_train/2-w": 1.4908442497253418, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.277346134185791, "rewards_train/margins_1": 0.9994289875030518, "rewards_train/margins_2": 0.24820971488952637, "step": 175 }, { "epoch": 0.52, "logps_train/policy_1_2": -147.5612335205078, "logps_train/policy_1_l": -161.59544372558594, "logps_train/policy_1_w": -164.5159912109375, "logps_train/policy_2_2": -117.27716827392578, "logps_train/policy_2_w": -206.75274658203125, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.4548137187957764, "rewards_train/1-l": -1.504075288772583, "rewards_train/1-w": 2.3232054710388184, "rewards_train/2-2": 2.097282648086548, "rewards_train/2-w": 1.1116397380828857, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8272807598114014, "rewards_train/margins_1": 0.868391752243042, "rewards_train/margins_2": 0.9856429100036621, "step": 175 }, { "epoch": 0.52, "logps_train/policy_1_2": -120.97914123535156, "logps_train/policy_1_l": -137.44786071777344, "logps_train/policy_1_w": -138.26913452148438, "logps_train/policy_2_2": -97.49528503417969, "logps_train/policy_2_w": -171.5074462890625, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 0.6645858287811279, "rewards_train/1-l": -1.2854115962982178, "rewards_train/1-w": 1.9975008964538574, "rewards_train/2-2": 1.4332835674285889, "rewards_train/2-w": 1.0797228813171387, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.282912492752075, "rewards_train/margins_1": 1.3329150676727295, "rewards_train/margins_2": 0.3535606861114502, "step": 175 }, { "epoch": 0.53, "learning_rate": 4.409808865306932e-06, "loss": 0.8028, "step": 176 }, { "epoch": 0.53, "logps_train/policy_1_2": -153.23095703125, "logps_train/policy_1_l": -163.28451538085938, "logps_train/policy_1_w": -176.3651885986328, "logps_train/policy_2_2": -118.09337615966797, "logps_train/policy_2_w": -219.62857055664062, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.310497760772705, "rewards_train/1-l": -1.1418293714523315, "rewards_train/1-w": 2.6232476234436035, "rewards_train/2-2": 1.9203503131866455, "rewards_train/2-w": 1.4058923721313477, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.765076994895935, "rewards_train/margins_1": 1.3127498626708984, "rewards_train/margins_2": 0.5144579410552979, "step": 176 }, { "epoch": 0.53, "logps_train/policy_1_2": -137.19456481933594, "logps_train/policy_1_l": -81.56657409667969, "logps_train/policy_1_w": -99.68966674804688, "logps_train/policy_2_2": -115.88108825683594, "logps_train/policy_2_w": -119.88220977783203, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -76.5, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 0.31804358959198, "rewards_train/1-l": -0.5144756436347961, "rewards_train/1-w": 1.3302520513534546, "rewards_train/2-2": 0.7947039604187012, "rewards_train/2-w": 0.9274041652679443, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 1.8447276949882507, "rewards_train/margins_1": 1.0122084617614746, "rewards_train/margins_2": -0.13270020484924316, "step": 176 }, { "epoch": 0.53, "logps_train/policy_1_2": -221.65493774414062, "logps_train/policy_1_l": -327.08282470703125, "logps_train/policy_1_w": -208.8646240234375, "logps_train/policy_2_2": -182.3037109375, "logps_train/policy_2_w": -253.27117919921875, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -282.0, "logps_train/ref_1_w": -236.0, "logps_train/ref_2_2": -205.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 1.4087252616882324, "rewards_train/1-l": -4.518050193786621, "rewards_train/1-w": 2.6322884559631348, "rewards_train/2-2": 2.273536205291748, "rewards_train/2-w": 1.5666327476501465, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.150338649749756, "rewards_train/margins_1": 1.2235631942749023, "rewards_train/margins_2": 0.7069034576416016, "step": 176 }, { "epoch": 0.53, "logps_train/policy_1_2": -219.10540771484375, "logps_train/policy_1_l": -154.6209716796875, "logps_train/policy_1_w": -96.49178314208984, "logps_train/policy_2_2": -179.7376251220703, "logps_train/policy_2_w": -117.53378295898438, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 0.5230522751808167, "rewards_train/1-l": -2.3472533226013184, "rewards_train/1-w": 2.5851964950561523, "rewards_train/2-2": 2.156217336654663, "rewards_train/2-w": 2.2059969902038574, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.932449817657471, "rewards_train/margins_1": 2.0621442198753357, "rewards_train/margins_2": -0.049779653549194336, "step": 176 }, { "epoch": 0.53, "logps_train/policy_1_2": -72.13705444335938, "logps_train/policy_1_l": -25.049543380737305, "logps_train/policy_1_w": -56.725502014160156, "logps_train/policy_2_2": -56.400001525878906, "logps_train/policy_2_w": -71.51995849609375, "logps_train/ref_1_2": -81.5, "logps_train/ref_1_l": -22.5, "logps_train/ref_1_w": -68.5, "logps_train/ref_2_2": -72.0, "logps_train/ref_2_w": -80.0, "rewards_train/1-2": 0.9466951489448547, "rewards_train/1-l": -0.2484600692987442, "rewards_train/1-w": 1.1628506183624268, "rewards_train/2-2": 1.5858790874481201, "rewards_train/2-w": 0.8324764966964722, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.411310687661171, "rewards_train/margins_1": 0.21615546941757202, "rewards_train/margins_2": 0.753402590751648, "step": 176 }, { "epoch": 0.53, "logps_train/policy_1_2": -162.8402862548828, "logps_train/policy_1_l": -180.89340209960938, "logps_train/policy_1_w": -130.18618774414062, "logps_train/policy_2_2": -132.35009765625, "logps_train/policy_2_w": -175.40576171875, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 0.3879443407058716, "rewards_train/1-l": -0.39051321148872375, "rewards_train/1-w": 2.148862600326538, "rewards_train/2-2": 1.1634280681610107, "rewards_train/2-w": 0.47622019052505493, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.539375811815262, "rewards_train/margins_1": 1.7609182596206665, "rewards_train/margins_2": 0.6872078776359558, "step": 176 }, { "epoch": 0.53, "logps_train/policy_1_2": -84.04570007324219, "logps_train/policy_1_l": -96.58837127685547, "logps_train/policy_1_w": -76.53236389160156, "logps_train/policy_2_2": -62.3577766418457, "logps_train/policy_2_w": -105.0838623046875, "logps_train/ref_1_2": -90.0, "logps_train/ref_1_l": -84.5, "logps_train/ref_1_w": -95.5, "logps_train/ref_2_2": -70.5, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": 0.6204299926757812, "rewards_train/1-l": -1.2389159202575684, "rewards_train/1-w": 1.8670763969421387, "rewards_train/2-2": 0.81734699010849, "rewards_train/2-w": 0.9681770205497742, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.105992317199707, "rewards_train/margins_1": 1.2466464042663574, "rewards_train/margins_2": -0.15083003044128418, "step": 176 }, { "epoch": 0.53, "logps_train/policy_1_2": -202.78189086914062, "logps_train/policy_1_l": -282.24188232421875, "logps_train/policy_1_w": -174.8023681640625, "logps_train/policy_2_2": -154.51666259765625, "logps_train/policy_2_w": -235.8870849609375, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -260.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -239.0, "rewards_train/1-2": 1.6093101501464844, "rewards_train/1-l": -2.214815616607666, "rewards_train/1-w": 2.06976318359375, "rewards_train/2-2": 2.5483343601226807, "rewards_train/2-w": 0.35504186153411865, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.284578800201416, "rewards_train/margins_1": 0.4604530334472656, "rewards_train/margins_2": 2.193292498588562, "step": 176 }, { "epoch": 0.53, "logps_train/policy_1_2": -138.0616455078125, "logps_train/policy_1_l": -162.93495178222656, "logps_train/policy_1_w": -158.63150024414062, "logps_train/policy_2_2": -112.8171615600586, "logps_train/policy_2_w": -187.61688232421875, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.5969600677490234, "rewards_train/1-l": -1.7722055912017822, "rewards_train/1-w": 2.824349880218506, "rewards_train/2-2": 2.4917218685150146, "rewards_train/2-w": 1.6500303745269775, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.596555471420288, "rewards_train/margins_1": 1.2273898124694824, "rewards_train/margins_2": 0.8416914939880371, "step": 177 }, { "epoch": 0.53, "logps_train/policy_1_2": -111.17295837402344, "logps_train/policy_1_l": -99.53058624267578, "logps_train/policy_1_w": -122.15514373779297, "logps_train/policy_2_2": -96.89498901367188, "logps_train/policy_2_w": -152.76568603515625, "logps_train/ref_1_2": -121.5, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.0461809635162354, "rewards_train/1-l": -0.5825504660606384, "rewards_train/1-w": 1.8962044715881348, "rewards_train/2-2": 1.5927280187606812, "rewards_train/2-w": 0.9560493230819702, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.478754937648773, "rewards_train/margins_1": 0.8500235080718994, "rewards_train/margins_2": 0.6366786956787109, "step": 177 }, { "epoch": 0.53, "logps_train/policy_1_2": -103.75921630859375, "logps_train/policy_1_l": -124.08340454101562, "logps_train/policy_1_w": -111.98297882080078, "logps_train/policy_2_2": -84.03968048095703, "logps_train/policy_2_w": -139.17404174804688, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -115.5, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.533453106880188, "rewards_train/1-l": -0.8546298742294312, "rewards_train/1-w": 2.5188896656036377, "rewards_train/2-2": 1.8702504634857178, "rewards_train/2-w": 1.9263452291488647, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.373519539833069, "rewards_train/margins_1": 0.9854365587234497, "rewards_train/margins_2": -0.05609476566314697, "step": 177 }, { "epoch": 0.53, "logps_train/policy_1_2": -105.7540283203125, "logps_train/policy_1_l": -157.88931274414062, "logps_train/policy_1_w": -112.83346557617188, "logps_train/policy_2_2": -87.17231750488281, "logps_train/policy_2_w": -139.59573364257812, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.63397216796875, "rewards_train/1-l": -1.0267720222473145, "rewards_train/1-w": 1.8869658708572388, "rewards_train/2-2": 0.7069869637489319, "rewards_train/2-w": 1.184175729751587, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.9137378931045532, "rewards_train/margins_1": 1.2529937028884888, "rewards_train/margins_2": -0.47718876600265503, "step": 177 }, { "epoch": 0.53, "logps_train/policy_1_2": -106.27000427246094, "logps_train/policy_1_l": -99.93226623535156, "logps_train/policy_1_w": -83.82557678222656, "logps_train/policy_2_2": -99.06307983398438, "logps_train/policy_2_w": -96.45259857177734, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -105.5, "rewards_train/1-2": 1.7245620489120483, "rewards_train/1-l": -1.1799451112747192, "rewards_train/1-w": 1.330723762512207, "rewards_train/2-2": 1.9038474559783936, "rewards_train/2-w": 0.9305211305618286, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.5106688737869263, "rewards_train/margins_1": -0.3938382863998413, "rewards_train/margins_2": 0.9733263254165649, "step": 177 }, { "epoch": 0.53, "logps_train/policy_1_2": -130.5709228515625, "logps_train/policy_1_l": -157.03240966796875, "logps_train/policy_1_w": -134.8207550048828, "logps_train/policy_2_2": -101.14604187011719, "logps_train/policy_2_w": -181.587646484375, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": 1.8569711446762085, "rewards_train/1-l": -1.57765531539917, "rewards_train/1-w": 2.671049118041992, "rewards_train/2-2": 2.404146194458008, "rewards_train/2-w": 1.5396727323532104, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.248704433441162, "rewards_train/margins_1": 0.8140779733657837, "rewards_train/margins_2": 0.8644734621047974, "step": 177 }, { "epoch": 0.53, "logps_train/policy_1_2": -102.32239532470703, "logps_train/policy_1_l": -155.08670043945312, "logps_train/policy_1_w": -90.34097290039062, "logps_train/policy_2_2": -83.71734619140625, "logps_train/policy_2_w": -104.7812271118164, "logps_train/ref_1_2": -109.5, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -103.5, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": 0.6966668367385864, "rewards_train/1-l": -2.03562331199646, "rewards_train/1-w": 1.3190282583236694, "rewards_train/2-2": 1.30775785446167, "rewards_train/2-w": 1.1687521934509277, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3546515703201294, "rewards_train/margins_1": 0.622361421585083, "rewards_train/margins_2": 0.1390056610107422, "step": 177 }, { "epoch": 0.53, "logps_train/policy_1_2": -235.88485717773438, "logps_train/policy_1_l": -168.76055908203125, "logps_train/policy_1_w": -104.1951675415039, "logps_train/policy_2_2": -197.13623046875, "logps_train/policy_2_w": -135.42776489257812, "logps_train/ref_1_2": -254.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -224.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.7755762338638306, "rewards_train/1-l": -1.0104312896728516, "rewards_train/1-w": 1.699233055114746, "rewards_train/2-2": 2.7457520961761475, "rewards_train/2-w": 0.7822240591049194, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.7096643447875977, "rewards_train/margins_1": -0.07634317874908447, "rewards_train/margins_2": 1.963528037071228, "step": 177 }, { "epoch": 0.53, "learning_rate": 4.393778105404051e-06, "loss": 0.8579, "step": 178 }, { "epoch": 0.53, "logps_train/policy_1_2": -65.18770599365234, "logps_train/policy_1_l": -74.04574584960938, "logps_train/policy_1_w": -71.44454193115234, "logps_train/policy_2_2": -56.098365783691406, "logps_train/policy_2_w": -82.20526885986328, "logps_train/ref_1_2": -77.5, "logps_train/ref_1_l": -65.5, "logps_train/ref_1_w": -84.5, "logps_train/ref_2_2": -70.0, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": 1.2152141332626343, "rewards_train/1-l": -0.8565281629562378, "rewards_train/1-w": 1.3078898191452026, "rewards_train/2-2": 1.4177026748657227, "rewards_train/2-w": 0.973223090171814, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.1644179821014404, "rewards_train/margins_1": 0.09267568588256836, "rewards_train/margins_2": 0.4444795846939087, "step": 178 }, { "epoch": 0.53, "logps_train/policy_1_2": -206.23655700683594, "logps_train/policy_1_l": -189.25009155273438, "logps_train/policy_1_w": -150.89456176757812, "logps_train/policy_2_2": -166.85772705078125, "logps_train/policy_2_w": -194.47299194335938, "logps_train/ref_1_2": -227.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": 2.082595109939575, "rewards_train/1-l": -1.4761803150177002, "rewards_train/1-w": 2.6562459468841553, "rewards_train/2-2": 3.468914747238159, "rewards_train/2-w": 1.6038724184036255, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.1324262619018555, "rewards_train/margins_1": 0.5736508369445801, "rewards_train/margins_2": 1.8650423288345337, "step": 178 }, { "epoch": 0.53, "logps_train/policy_1_2": -147.39505004882812, "logps_train/policy_1_l": -159.74440002441406, "logps_train/policy_1_w": -158.4427947998047, "logps_train/policy_2_2": -123.81542205810547, "logps_train/policy_2_w": -181.97183227539062, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 0.5763151049613953, "rewards_train/1-l": -1.6429941654205322, "rewards_train/1-w": 2.3932204246520996, "rewards_train/2-2": 1.5514659881591797, "rewards_train/2-w": 1.5653165578842163, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.036214590072632, "rewards_train/margins_1": 1.8169053196907043, "rewards_train/margins_2": -0.013850569725036621, "step": 178 }, { "epoch": 0.53, "logps_train/policy_1_2": -209.70925903320312, "logps_train/policy_1_l": -361.50189208984375, "logps_train/policy_1_w": -136.89630126953125, "logps_train/policy_2_2": -167.90011596679688, "logps_train/policy_2_w": -173.62588500976562, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -326.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.8900116682052612, "rewards_train/1-l": -3.5662035942077637, "rewards_train/1-w": 2.6931824684143066, "rewards_train/2-2": 2.8010032176971436, "rewards_train/2-w": 1.85616135597229, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.25938606262207, "rewards_train/margins_1": 0.8031708002090454, "rewards_train/margins_2": 0.9448418617248535, "step": 178 }, { "epoch": 0.53, "logps_train/policy_1_2": -178.6971435546875, "logps_train/policy_1_l": -173.45248413085938, "logps_train/policy_1_w": -166.8644256591797, "logps_train/policy_2_2": -146.80316162109375, "logps_train/policy_2_w": -195.5880584716797, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.384974479675293, "rewards_train/1-l": -1.2579917907714844, "rewards_train/1-w": 2.030745029449463, "rewards_train/2-2": 2.2966363430023193, "rewards_train/2-w": 1.244319200515747, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2887368202209473, "rewards_train/margins_1": 0.6457705497741699, "rewards_train/margins_2": 1.0523171424865723, "step": 178 }, { "epoch": 0.53, "logps_train/policy_1_2": -199.3076171875, "logps_train/policy_1_l": -213.2707061767578, "logps_train/policy_1_w": -262.89544677734375, "logps_train/policy_2_2": -155.11526489257812, "logps_train/policy_2_w": -334.127197265625, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -304.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -352.0, "rewards_train/1-2": 1.5754876136779785, "rewards_train/1-l": -2.114570140838623, "rewards_train/1-w": 4.035453796386719, "rewards_train/2-2": 2.5478498935699463, "rewards_train/2-w": 1.7458747625350952, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.150023937225342, "rewards_train/margins_1": 2.4599661827087402, "rewards_train/margins_2": 0.8019751310348511, "step": 178 }, { "epoch": 0.53, "logps_train/policy_1_2": -130.6241455078125, "logps_train/policy_1_l": -109.58671569824219, "logps_train/policy_1_w": -140.9341583251953, "logps_train/policy_2_2": -107.6260757446289, "logps_train/policy_2_w": -176.75091552734375, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -122.5, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.112585425376892, "rewards_train/1-l": -0.9645314812660217, "rewards_train/1-w": 2.0440850257873535, "rewards_train/2-2": 1.4764556884765625, "rewards_train/2-w": 1.249908447265625, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.0086165070533752, "rewards_train/margins_1": 0.9314996004104614, "rewards_train/margins_2": 0.2265472412109375, "step": 178 }, { "epoch": 0.53, "logps_train/policy_1_2": -155.9309539794922, "logps_train/policy_1_l": -159.8875732421875, "logps_train/policy_1_w": -105.06761932373047, "logps_train/policy_2_2": -132.12689208984375, "logps_train/policy_2_w": -123.96295928955078, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 1.3584671020507812, "rewards_train/1-l": -1.3836793899536133, "rewards_train/1-w": 2.5065197944641113, "rewards_train/2-2": 1.765434741973877, "rewards_train/2-w": 1.8990168571472168, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.8901991844177246, "rewards_train/margins_1": 1.14805269241333, "rewards_train/margins_2": -0.13358211517333984, "step": 178 }, { "epoch": 0.54, "logps_train/policy_1_2": -166.6021728515625, "logps_train/policy_1_l": -141.05096435546875, "logps_train/policy_1_w": -153.95167541503906, "logps_train/policy_2_2": -131.2900390625, "logps_train/policy_2_w": -191.87710571289062, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.6384172439575195, "rewards_train/1-l": -1.2918161153793335, "rewards_train/1-w": 2.234128952026367, "rewards_train/2-2": 2.676074504852295, "rewards_train/2-w": 1.2212738990783691, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.5259450674057007, "rewards_train/margins_1": 0.5957117080688477, "rewards_train/margins_2": 1.4548006057739258, "step": 179 }, { "epoch": 0.54, "logps_train/policy_1_2": -249.025634765625, "logps_train/policy_1_l": -157.91143798828125, "logps_train/policy_1_w": -152.97386169433594, "logps_train/policy_2_2": -212.1555633544922, "logps_train/policy_2_w": -191.4268341064453, "logps_train/ref_1_2": -266.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -240.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.6755614280700684, "rewards_train/1-l": -1.1819655895233154, "rewards_train/1-w": 2.4823012351989746, "rewards_train/2-2": 2.736006259918213, "rewards_train/2-w": 1.8354414701461792, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.66426682472229, "rewards_train/margins_1": 0.8067398071289062, "rewards_train/margins_2": 0.9005647897720337, "step": 179 }, { "epoch": 0.54, "logps_train/policy_1_2": -124.09909057617188, "logps_train/policy_1_l": -144.50259399414062, "logps_train/policy_1_w": -70.3285140991211, "logps_train/policy_2_2": -101.47874450683594, "logps_train/policy_2_w": -93.10735321044922, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -83.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -101.0, "rewards_train/1-2": 1.7697786092758179, "rewards_train/1-l": -1.5115875005722046, "rewards_train/1-w": 1.2745702266693115, "rewards_train/2-2": 2.1240007877349854, "rewards_train/2-w": 0.7798891663551331, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.786157727241516, "rewards_train/margins_1": -0.49520838260650635, "rewards_train/margins_2": 1.3441116213798523, "step": 179 }, { "epoch": 0.54, "logps_train/policy_1_2": -234.80047607421875, "logps_train/policy_1_l": -245.18077087402344, "logps_train/policy_1_w": -220.8915252685547, "logps_train/policy_2_2": -205.56402587890625, "logps_train/policy_2_w": -259.720947265625, "logps_train/ref_1_2": -255.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -249.0, "logps_train/ref_2_2": -234.0, "logps_train/ref_2_w": -276.0, "rewards_train/1-2": 2.0293264389038086, "rewards_train/1-l": -2.3217885494232178, "rewards_train/1-w": 2.7920985221862793, "rewards_train/2-2": 2.859222650527954, "rewards_train/2-w": 1.6279053688049316, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.113887071609497, "rewards_train/margins_1": 0.7627720832824707, "rewards_train/margins_2": 1.2313172817230225, "step": 179 }, { "epoch": 0.54, "logps_train/policy_1_2": -180.955322265625, "logps_train/policy_1_l": -146.06265258789062, "logps_train/policy_1_w": -138.73492431640625, "logps_train/policy_2_2": -136.3823699951172, "logps_train/policy_2_w": -169.779296875, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.5794687271118164, "rewards_train/1-l": -0.3674525320529938, "rewards_train/1-w": 1.8149287700653076, "rewards_train/2-2": 2.6023876667022705, "rewards_train/2-w": 1.5602993965148926, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.1823813021183014, "rewards_train/margins_1": 0.2354600429534912, "rewards_train/margins_2": 1.042088270187378, "step": 179 }, { "epoch": 0.54, "logps_train/policy_1_2": -210.7680206298828, "logps_train/policy_1_l": -200.77713012695312, "logps_train/policy_1_w": -256.562744140625, "logps_train/policy_2_2": -174.7257080078125, "logps_train/policy_2_w": -305.8356628417969, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -280.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -308.0, "rewards_train/1-2": 1.7419483661651611, "rewards_train/1-l": -0.8527147769927979, "rewards_train/1-w": 2.343724250793457, "rewards_train/2-2": 2.561803102493286, "rewards_train/2-w": 0.14299631118774414, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.196439027786255, "rewards_train/margins_1": 0.6017758846282959, "rewards_train/margins_2": 2.418806791305542, "step": 179 }, { "epoch": 0.54, "logps_train/policy_1_2": -131.0418243408203, "logps_train/policy_1_l": -137.16636657714844, "logps_train/policy_1_w": -114.15281677246094, "logps_train/policy_2_2": -103.26783752441406, "logps_train/policy_2_w": -147.22598266601562, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 1.6098796129226685, "rewards_train/1-l": -0.95472252368927, "rewards_train/1-w": 1.4095232486724854, "rewards_train/2-2": 2.522240161895752, "rewards_train/2-w": 0.4022060036659241, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.3642457723617554, "rewards_train/margins_1": -0.2003563642501831, "rewards_train/margins_2": 2.120034158229828, "step": 179 }, { "epoch": 0.54, "logps_train/policy_1_2": -176.94371032714844, "logps_train/policy_1_l": -227.07762145996094, "logps_train/policy_1_w": -191.8087158203125, "logps_train/policy_2_2": -156.65252685546875, "logps_train/policy_2_w": -215.1076202392578, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -222.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -239.0, "rewards_train/1-2": 1.6161754131317139, "rewards_train/1-l": -1.9620838165283203, "rewards_train/1-w": 3.0796737670898438, "rewards_train/2-2": 2.3491997718811035, "rewards_train/2-w": 2.4072070121765137, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.041757583618164, "rewards_train/margins_1": 1.4634983539581299, "rewards_train/margins_2": -0.058007240295410156, "step": 179 }, { "epoch": 0.54, "learning_rate": 4.377562516096608e-06, "loss": 0.6857, "step": 180 }, { "epoch": 0.54, "logps_train/policy_1_2": -193.8743133544922, "logps_train/policy_1_l": -151.55751037597656, "logps_train/policy_1_w": -171.48464965820312, "logps_train/policy_2_2": -157.7816619873047, "logps_train/policy_2_w": -208.507080078125, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.6828811168670654, "rewards_train/1-l": -0.9682513475418091, "rewards_train/1-w": 2.1827847957611084, "rewards_train/2-2": 2.798396348953247, "rewards_train/2-w": 1.1805415153503418, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1510361433029175, "rewards_train/margins_1": 0.49990367889404297, "rewards_train/margins_2": 1.6178548336029053, "step": 180 }, { "epoch": 0.54, "logps_train/policy_1_2": -221.43966674804688, "logps_train/policy_1_l": -224.361328125, "logps_train/policy_1_w": -195.1599884033203, "logps_train/policy_2_2": -173.4319305419922, "logps_train/policy_2_w": -248.2292022705078, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -226.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -266.0, "rewards_train/1-2": 2.233377456665039, "rewards_train/1-l": -1.808298945426941, "rewards_train/1-w": 3.1152517795562744, "rewards_train/2-2": 3.450556755065918, "rewards_train/2-w": 1.763016939163208, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.923550724983215, "rewards_train/margins_1": 0.8818743228912354, "rewards_train/margins_2": 1.68753981590271, "step": 180 }, { "epoch": 0.54, "logps_train/policy_1_2": -54.36992263793945, "logps_train/policy_1_l": -66.74945831298828, "logps_train/policy_1_w": -70.64938354492188, "logps_train/policy_2_2": -45.58485794067383, "logps_train/policy_2_w": -79.34627532958984, "logps_train/ref_1_2": -60.25, "logps_train/ref_1_l": -61.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -52.5, "logps_train/ref_2_w": -91.5, "rewards_train/1-2": 0.5817574858665466, "rewards_train/1-l": -0.5917429327964783, "rewards_train/1-w": 1.3506864309310913, "rewards_train/2-2": 0.6729594469070435, "rewards_train/2-w": 1.209903597831726, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 1.9424293637275696, "rewards_train/margins_1": 0.7689289450645447, "rewards_train/margins_2": -0.5369441509246826, "step": 180 }, { "epoch": 0.54, "logps_train/policy_1_2": -217.95660400390625, "logps_train/policy_1_l": -256.3082275390625, "logps_train/policy_1_w": -192.51318359375, "logps_train/policy_2_2": -177.86492919921875, "logps_train/policy_2_w": -244.87344360351562, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -237.0, "logps_train/ref_1_w": -232.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -272.0, "rewards_train/1-2": 2.30433988571167, "rewards_train/1-l": -1.8620744943618774, "rewards_train/1-w": 3.961181640625, "rewards_train/2-2": 3.288508176803589, "rewards_train/2-w": 2.8001551628112793, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.823256134986877, "rewards_train/margins_1": 1.65684175491333, "rewards_train/margins_2": 0.48835301399230957, "step": 180 }, { "epoch": 0.54, "logps_train/policy_1_2": -113.21311950683594, "logps_train/policy_1_l": -176.83489990234375, "logps_train/policy_1_w": -114.75750732421875, "logps_train/policy_2_2": -83.26991271972656, "logps_train/policy_2_w": -148.26181030273438, "logps_train/ref_1_2": -125.5, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -102.5, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.2060317993164062, "rewards_train/1-l": -2.0485286712646484, "rewards_train/1-w": 2.082061290740967, "rewards_train/2-2": 1.9308216571807861, "rewards_train/2-w": 1.2863178253173828, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.130589962005615, "rewards_train/margins_1": 0.8760294914245605, "rewards_train/margins_2": 0.6445038318634033, "step": 180 }, { "epoch": 0.54, "logps_train/policy_1_2": -142.56011962890625, "logps_train/policy_1_l": -126.93846893310547, "logps_train/policy_1_w": -142.54156494140625, "logps_train/policy_2_2": -120.01568603515625, "logps_train/policy_2_w": -166.23849487304688, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 0.9299260377883911, "rewards_train/1-l": -1.3125972747802734, "rewards_train/1-w": 1.459906816482544, "rewards_train/2-2": 1.6203070878982544, "rewards_train/2-w": 1.0527136325836182, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.7725040912628174, "rewards_train/margins_1": 0.5299807786941528, "rewards_train/margins_2": 0.5675934553146362, "step": 180 }, { "epoch": 0.54, "logps_train/policy_1_2": -184.47494506835938, "logps_train/policy_1_l": -185.1131134033203, "logps_train/policy_1_w": -110.7006607055664, "logps_train/policy_2_2": -159.59068298339844, "logps_train/policy_2_w": -136.64755249023438, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 2.329847812652588, "rewards_train/1-l": -1.5398271083831787, "rewards_train/1-w": 3.0643091201782227, "rewards_train/2-2": 3.006556510925293, "rewards_train/2-w": 2.4555575847625732, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.604136228561401, "rewards_train/margins_1": 0.7344613075256348, "rewards_train/margins_2": 0.5509989261627197, "step": 180 }, { "epoch": 0.54, "logps_train/policy_1_2": -112.6708755493164, "logps_train/policy_1_l": -113.8823471069336, "logps_train/policy_1_w": -104.81421661376953, "logps_train/policy_2_2": -90.96380615234375, "logps_train/policy_2_w": -131.69969177246094, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.082912564277649, "rewards_train/1-l": -1.4483907222747803, "rewards_train/1-w": 1.824828028678894, "rewards_train/2-2": 1.728618860244751, "rewards_train/2-w": 1.0175302028656006, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2732187509536743, "rewards_train/margins_1": 0.7419154644012451, "rewards_train/margins_2": 0.7110886573791504, "step": 180 }, { "epoch": 0.54, "logps_train/policy_1_2": -132.88868713378906, "logps_train/policy_1_l": -123.36835479736328, "logps_train/policy_1_w": -101.63024139404297, "logps_train/policy_2_2": -110.56684112548828, "logps_train/policy_2_w": -126.54536437988281, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.3236315250396729, "rewards_train/1-l": -1.2676950693130493, "rewards_train/1-w": 1.5354129076004028, "rewards_train/2-2": 1.8825736045837402, "rewards_train/2-w": 1.319096326828003, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.803107976913452, "rewards_train/margins_1": 0.21178138256072998, "rewards_train/margins_2": 0.5634772777557373, "step": 181 }, { "epoch": 0.54, "logps_train/policy_1_2": -83.73165893554688, "logps_train/policy_1_l": -121.0859375, "logps_train/policy_1_w": -72.62779235839844, "logps_train/policy_2_2": -65.73939514160156, "logps_train/policy_2_w": -95.92654418945312, "logps_train/ref_1_2": -92.0, "logps_train/ref_1_l": -110.5, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -76.5, "logps_train/ref_2_w": -105.5, "rewards_train/1-2": 0.794021487236023, "rewards_train/1-l": -1.0587900876998901, "rewards_train/1-w": 1.1680805683135986, "rewards_train/2-2": 1.0729360580444336, "rewards_train/2-w": 0.9385961294174194, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.2268706560134888, "rewards_train/margins_1": 0.3740590810775757, "rewards_train/margins_2": 0.13433992862701416, "step": 181 }, { "epoch": 0.54, "logps_train/policy_1_2": -104.75968170166016, "logps_train/policy_1_l": -99.18153381347656, "logps_train/policy_1_w": -70.56317138671875, "logps_train/policy_2_2": -81.47449493408203, "logps_train/policy_2_w": -89.2969970703125, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -85.0, "logps_train/ref_1_w": -82.0, "logps_train/ref_2_2": -91.5, "logps_train/ref_2_w": -100.5, "rewards_train/1-2": 0.6271562576293945, "rewards_train/1-l": -1.4008187055587769, "rewards_train/1-w": 1.1780579090118408, "rewards_train/2-2": 0.9904412627220154, "rewards_train/2-w": 1.1312379837036133, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.125, "rewards_train/margins": 2.5788766145706177, "rewards_train/margins_1": 0.5509016513824463, "rewards_train/margins_2": -0.1407967209815979, "step": 181 }, { "epoch": 0.54, "logps_train/policy_1_2": -182.7981414794922, "logps_train/policy_1_l": -113.85545349121094, "logps_train/policy_1_w": -156.20321655273438, "logps_train/policy_2_2": -149.85862731933594, "logps_train/policy_2_w": -186.17254638671875, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.627997636795044, "rewards_train/1-l": -0.7941387891769409, "rewards_train/1-w": 2.473428249359131, "rewards_train/2-2": 2.4625751972198486, "rewards_train/2-w": 1.7389953136444092, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.2675670385360718, "rewards_train/margins_1": 0.8454306125640869, "rewards_train/margins_2": 0.7235798835754395, "step": 181 }, { "epoch": 0.54, "logps_train/policy_1_2": -116.03187561035156, "logps_train/policy_1_l": -161.93301391601562, "logps_train/policy_1_w": -127.089111328125, "logps_train/policy_2_2": -94.40986633300781, "logps_train/policy_2_w": -147.2645263671875, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.743687391281128, "rewards_train/1-l": -1.2444734573364258, "rewards_train/1-w": 2.0481209754943848, "rewards_train/2-2": 1.88479483127594, "rewards_train/2-w": 1.3712031841278076, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2925944328308105, "rewards_train/margins_1": 0.30443358421325684, "rewards_train/margins_2": 0.5135916471481323, "step": 181 }, { "epoch": 0.54, "logps_train/policy_1_2": -159.7410888671875, "logps_train/policy_1_l": -110.32197570800781, "logps_train/policy_1_w": -115.11824798583984, "logps_train/policy_2_2": -125.02706909179688, "logps_train/policy_2_w": -152.41079711914062, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.7430777549743652, "rewards_train/1-l": -0.4543166756629944, "rewards_train/1-w": 2.0334880352020264, "rewards_train/2-2": 2.1879186630249023, "rewards_train/2-w": 1.2940770387649536, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.4878047108650208, "rewards_train/margins_1": 0.29041028022766113, "rewards_train/margins_2": 0.8938416242599487, "step": 181 }, { "epoch": 0.54, "logps_train/policy_1_2": -190.2794952392578, "logps_train/policy_1_l": -157.68316650390625, "logps_train/policy_1_w": -95.35209655761719, "logps_train/policy_2_2": -150.8591766357422, "logps_train/policy_2_w": -115.43063354492188, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.1884580850601196, "rewards_train/1-l": -1.6862859725952148, "rewards_train/1-w": 1.8765089511871338, "rewards_train/2-2": 2.9031448364257812, "rewards_train/2-w": 1.555374264717102, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.5627949237823486, "rewards_train/margins_1": 0.6880508661270142, "rewards_train/margins_2": 1.3477705717086792, "step": 181 }, { "epoch": 0.54, "logps_train/policy_1_2": -142.00576782226562, "logps_train/policy_1_l": -139.89666748046875, "logps_train/policy_1_w": -94.21221923828125, "logps_train/policy_2_2": -115.82624816894531, "logps_train/policy_2_w": -123.61764526367188, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.3119230270385742, "rewards_train/1-l": -1.7584168910980225, "rewards_train/1-w": 1.8281919956207275, "rewards_train/2-2": 1.9892504215240479, "rewards_train/2-w": 1.0499536991119385, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.58660888671875, "rewards_train/margins_1": 0.5162689685821533, "rewards_train/margins_2": 0.9392967224121094, "step": 181 }, { "epoch": 0.54, "learning_rate": 4.361163679997532e-06, "loss": 0.7308, "step": 182 }, { "epoch": 0.54, "logps_train/policy_1_2": -147.09091186523438, "logps_train/policy_1_l": -111.1368408203125, "logps_train/policy_1_w": -129.8872528076172, "logps_train/policy_2_2": -123.86447143554688, "logps_train/policy_2_w": -147.9437255859375, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.9393467903137207, "rewards_train/1-l": -1.038488745689392, "rewards_train/1-w": 1.7819781303405762, "rewards_train/2-2": 2.5479278564453125, "rewards_train/2-w": 1.0564088821411133, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8204668760299683, "rewards_train/margins_1": -0.15736865997314453, "rewards_train/margins_2": 1.4915189743041992, "step": 182 }, { "epoch": 0.54, "logps_train/policy_1_2": -103.90626525878906, "logps_train/policy_1_l": -94.99626159667969, "logps_train/policy_1_w": -59.68562316894531, "logps_train/policy_2_2": -82.45319366455078, "logps_train/policy_2_w": -82.98941040039062, "logps_train/ref_1_2": -117.5, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -69.5, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -88.0, "rewards_train/1-2": 1.339842438697815, "rewards_train/1-l": -0.5937670469284058, "rewards_train/1-w": 0.9587810635566711, "rewards_train/2-2": 1.9538993835449219, "rewards_train/2-w": 0.46629399061203003, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.552548110485077, "rewards_train/margins_1": -0.3810613751411438, "rewards_train/margins_2": 1.4876053929328918, "step": 182 }, { "epoch": 0.54, "logps_train/policy_1_2": -188.7777862548828, "logps_train/policy_1_l": -208.46705627441406, "logps_train/policy_1_w": -159.20132446289062, "logps_train/policy_2_2": -132.24891662597656, "logps_train/policy_2_w": -227.93734741210938, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -197.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -244.0, "rewards_train/1-2": 1.6722207069396973, "rewards_train/1-l": -1.371706485748291, "rewards_train/1-w": 3.7794766426086426, "rewards_train/2-2": 3.421982765197754, "rewards_train/2-w": 1.60978102684021, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.151183128356934, "rewards_train/margins_1": 2.1072559356689453, "rewards_train/margins_2": 1.812201738357544, "step": 182 }, { "epoch": 0.54, "logps_train/policy_1_2": -177.37586975097656, "logps_train/policy_1_l": -168.0347900390625, "logps_train/policy_1_w": -210.3975372314453, "logps_train/policy_2_2": -139.6318359375, "logps_train/policy_2_w": -263.0701904296875, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -241.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -282.0, "rewards_train/1-2": 1.8971790075302124, "rewards_train/1-l": -1.454651117324829, "rewards_train/1-w": 3.047746181488037, "rewards_train/2-2": 2.6387691497802734, "rewards_train/2-w": 1.8492279052734375, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.502397298812866, "rewards_train/margins_1": 1.1505671739578247, "rewards_train/margins_2": 0.7895412445068359, "step": 182 }, { "epoch": 0.54, "logps_train/policy_1_2": -181.6083984375, "logps_train/policy_1_l": -193.76234436035156, "logps_train/policy_1_w": -130.2823944091797, "logps_train/policy_2_2": -139.15789794921875, "logps_train/policy_2_w": -173.96893310546875, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.2000980377197266, "rewards_train/1-l": -1.8629534244537354, "rewards_train/1-w": 2.1858224868774414, "rewards_train/2-2": 2.3763985633850098, "rewards_train/2-w": 1.3421697616577148, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.048775911331177, "rewards_train/margins_1": 0.9857244491577148, "rewards_train/margins_2": 1.034228801727295, "step": 182 }, { "epoch": 0.54, "logps_train/policy_1_2": -247.51710510253906, "logps_train/policy_1_l": -189.69461059570312, "logps_train/policy_1_w": -171.4439697265625, "logps_train/policy_2_2": -207.01510620117188, "logps_train/policy_2_w": -204.98532104492188, "logps_train/ref_1_2": -262.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -203.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 1.438915729522705, "rewards_train/1-l": -0.9085242748260498, "rewards_train/1-w": 3.1556026935577393, "rewards_train/2-2": 2.474661350250244, "rewards_train/2-w": 2.12959361076355, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.064126968383789, "rewards_train/margins_1": 1.7166869640350342, "rewards_train/margins_2": 0.34506773948669434, "step": 182 }, { "epoch": 0.54, "logps_train/policy_1_2": -165.0509033203125, "logps_train/policy_1_l": -197.3243408203125, "logps_train/policy_1_w": -126.33041381835938, "logps_train/policy_2_2": -127.53816986083984, "logps_train/policy_2_w": -161.42430114746094, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.2374870777130127, "rewards_train/1-l": -2.474329948425293, "rewards_train/1-w": 1.7544587850570679, "rewards_train/2-2": 2.3344883918762207, "rewards_train/2-w": 0.9067883491516113, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.228788733482361, "rewards_train/margins_1": 0.5169717073440552, "rewards_train/margins_2": 1.4277000427246094, "step": 182 }, { "epoch": 0.54, "logps_train/policy_1_2": -203.666748046875, "logps_train/policy_1_l": -299.81610107421875, "logps_train/policy_1_w": -158.05621337890625, "logps_train/policy_2_2": -162.47262573242188, "logps_train/policy_2_w": -199.9615478515625, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -272.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 1.9083251953125, "rewards_train/1-l": -2.810516119003296, "rewards_train/1-w": 2.900629758834839, "rewards_train/2-2": 2.658987045288086, "rewards_train/2-w": 1.7350950241088867, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.711145877838135, "rewards_train/margins_1": 0.9923045635223389, "rewards_train/margins_2": 0.9238920211791992, "step": 182 }, { "epoch": 0.55, "logps_train/policy_1_2": -247.37753295898438, "logps_train/policy_1_l": -189.49594116210938, "logps_train/policy_1_w": -165.36318969726562, "logps_train/policy_2_2": -195.50311279296875, "logps_train/policy_2_w": -199.62411499023438, "logps_train/ref_1_2": -272.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -236.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 2.487246036529541, "rewards_train/1-l": -1.180843710899353, "rewards_train/1-w": 1.804306149482727, "rewards_train/2-2": 4.030938148498535, "rewards_train/2-w": 1.2579017877578735, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.98514986038208, "rewards_train/margins_1": -0.682939887046814, "rewards_train/margins_2": 2.7730363607406616, "step": 183 }, { "epoch": 0.55, "logps_train/policy_1_2": -121.18701934814453, "logps_train/policy_1_l": -84.56344604492188, "logps_train/policy_1_w": -128.876953125, "logps_train/policy_2_2": -95.20948028564453, "logps_train/policy_2_w": -169.64987182617188, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -76.5, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.7094230651855469, "rewards_train/1-l": -0.8125951290130615, "rewards_train/1-w": 2.6708996295928955, "rewards_train/2-2": 2.4009270668029785, "rewards_train/2-w": 1.5221234560012817, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.483494758605957, "rewards_train/margins_1": 0.9614765644073486, "rewards_train/margins_2": 0.8788036108016968, "step": 183 }, { "epoch": 0.55, "logps_train/policy_1_2": -107.26847076416016, "logps_train/policy_1_l": -97.18037414550781, "logps_train/policy_1_w": -109.53977966308594, "logps_train/policy_2_2": -84.69953155517578, "logps_train/policy_2_w": -127.90867614746094, "logps_train/ref_1_2": -116.5, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -99.5, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 0.9083089232444763, "rewards_train/1-l": -0.45604971051216125, "rewards_train/1-w": 1.7944588661193848, "rewards_train/2-2": 1.4653985500335693, "rewards_train/2-w": 1.2810076475143433, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.250508576631546, "rewards_train/margins_1": 0.8861499428749084, "rewards_train/margins_2": 0.18439090251922607, "step": 183 }, { "epoch": 0.55, "logps_train/policy_1_2": -221.93289184570312, "logps_train/policy_1_l": -214.76834106445312, "logps_train/policy_1_w": -194.31539916992188, "logps_train/policy_2_2": -192.11752319335938, "logps_train/policy_2_w": -219.3217315673828, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -232.0, "logps_train/ref_2_2": -226.0, "logps_train/ref_2_w": -250.0, "rewards_train/1-2": 2.6317121982574463, "rewards_train/1-l": -1.1549593210220337, "rewards_train/1-w": 3.7559597492218018, "rewards_train/2-2": 3.450747489929199, "rewards_train/2-w": 3.055325984954834, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.9109190702438354, "rewards_train/margins_1": 1.1242475509643555, "rewards_train/margins_2": 0.39542150497436523, "step": 183 }, { "epoch": 0.55, "logps_train/policy_1_2": -109.88380432128906, "logps_train/policy_1_l": -108.9834976196289, "logps_train/policy_1_w": -111.37574005126953, "logps_train/policy_2_2": -88.69873809814453, "logps_train/policy_2_w": -141.32089233398438, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.329588770866394, "rewards_train/1-l": -0.6927834749221802, "rewards_train/1-w": 2.0376217365264893, "rewards_train/2-2": 1.6246576309204102, "rewards_train/2-w": 1.4417387247085571, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.7304052114486694, "rewards_train/margins_1": 0.7080329656600952, "rewards_train/margins_2": 0.18291890621185303, "step": 183 }, { "epoch": 0.55, "logps_train/policy_1_2": -195.522216796875, "logps_train/policy_1_l": -239.49468994140625, "logps_train/policy_1_w": -222.48141479492188, "logps_train/policy_2_2": -158.09164428710938, "logps_train/policy_2_w": -266.340576171875, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -262.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -292.0, "rewards_train/1-2": 2.0165295600891113, "rewards_train/1-l": -2.133842945098877, "rewards_train/1-w": 3.8737330436706543, "rewards_train/2-2": 3.1689610481262207, "rewards_train/2-w": 2.425316333770752, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.007575988769531, "rewards_train/margins_1": 1.857203483581543, "rewards_train/margins_2": 0.7436447143554688, "step": 183 }, { "epoch": 0.55, "logps_train/policy_1_2": -171.5828399658203, "logps_train/policy_1_l": -122.0240478515625, "logps_train/policy_1_w": -76.69131469726562, "logps_train/policy_2_2": -146.0399169921875, "logps_train/policy_2_w": -98.59522247314453, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 2.274529218673706, "rewards_train/1-l": -1.3049436807632446, "rewards_train/1-w": 1.634775161743164, "rewards_train/2-2": 3.2030386924743652, "rewards_train/2-w": 1.2295401096343994, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.9397188425064087, "rewards_train/margins_1": -0.639754056930542, "rewards_train/margins_2": 1.9734985828399658, "step": 183 }, { "epoch": 0.55, "logps_train/policy_1_2": -164.7396697998047, "logps_train/policy_1_l": -172.39234924316406, "logps_train/policy_1_w": -201.7440185546875, "logps_train/policy_2_2": -137.40798950195312, "logps_train/policy_2_w": -236.91049194335938, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -231.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -250.0, "rewards_train/1-2": 1.3197834491729736, "rewards_train/1-l": -1.298609972000122, "rewards_train/1-w": 2.8818492889404297, "rewards_train/2-2": 1.9115450382232666, "rewards_train/2-w": 1.3870769739151, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.180459260940552, "rewards_train/margins_1": 1.562065839767456, "rewards_train/margins_2": 0.5244680643081665, "step": 183 }, { "epoch": 0.55, "learning_rate": 4.344583197604319e-06, "loss": 0.6848, "step": 184 }, { "epoch": 0.55, "logps_train/policy_1_2": -155.613525390625, "logps_train/policy_1_l": -145.69158935546875, "logps_train/policy_1_w": -82.40139770507812, "logps_train/policy_2_2": -124.10203552246094, "logps_train/policy_2_w": -102.11325073242188, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -122.5, "rewards_train/1-2": 1.5730235576629639, "rewards_train/1-l": -1.5531432628631592, "rewards_train/1-w": 2.1856417655944824, "rewards_train/2-2": 2.603858709335327, "rewards_train/2-w": 2.0574257373809814, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.7387850284576416, "rewards_train/margins_1": 0.6126182079315186, "rewards_train/margins_2": 0.5464329719543457, "step": 184 }, { "epoch": 0.55, "logps_train/policy_1_2": -144.99798583984375, "logps_train/policy_1_l": -122.39328002929688, "logps_train/policy_1_w": -114.77852630615234, "logps_train/policy_2_2": -117.99828338623047, "logps_train/policy_2_w": -150.189208984375, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -109.5, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.0252000093460083, "rewards_train/1-l": -1.2805386781692505, "rewards_train/1-w": 2.8530073165893555, "rewards_train/2-2": 1.5944104194641113, "rewards_train/2-w": 1.7967052459716797, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.133545994758606, "rewards_train/margins_1": 1.8278073072433472, "rewards_train/margins_2": -0.20229482650756836, "step": 184 }, { "epoch": 0.55, "logps_train/policy_1_2": -128.5709686279297, "logps_train/policy_1_l": -160.4003448486328, "logps_train/policy_1_w": -149.31549072265625, "logps_train/policy_2_2": -104.22265625, "logps_train/policy_2_w": -171.84149169921875, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -122.5, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.3257150650024414, "rewards_train/1-l": -0.9138626456260681, "rewards_train/1-w": 1.7020456790924072, "rewards_train/2-2": 1.8359382152557373, "rewards_train/2-w": 1.2939753532409668, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.6159083247184753, "rewards_train/margins_1": 0.3763306140899658, "rewards_train/margins_2": 0.5419628620147705, "step": 184 }, { "epoch": 0.55, "logps_train/policy_1_2": -120.30258178710938, "logps_train/policy_1_l": -118.71273040771484, "logps_train/policy_1_w": -144.42105102539062, "logps_train/policy_2_2": -98.26486206054688, "logps_train/policy_2_w": -174.0049591064453, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -112.5, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.0009914636611938, "rewards_train/1-l": -0.6853359937667847, "rewards_train/1-w": 2.3828935623168945, "rewards_train/2-2": 1.4282009601593018, "rewards_train/2-w": 1.6338789463043213, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.068229556083679, "rewards_train/margins_1": 1.3819020986557007, "rewards_train/margins_2": -0.20567798614501953, "step": 184 }, { "epoch": 0.55, "logps_train/policy_1_2": -102.75420379638672, "logps_train/policy_1_l": -105.57025909423828, "logps_train/policy_1_w": -77.55224609375, "logps_train/policy_2_2": -83.64369201660156, "logps_train/policy_2_w": -88.77389526367188, "logps_train/ref_1_2": -118.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -85.5, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": 1.5533397197723389, "rewards_train/1-l": -0.7751898169517517, "rewards_train/1-w": 0.805565595626831, "rewards_train/2-2": 1.9004507064819336, "rewards_train/2-w": 0.6186802387237549, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.5807554125785828, "rewards_train/margins_1": -0.7477741241455078, "rewards_train/margins_2": 1.2817704677581787, "step": 184 }, { "epoch": 0.55, "logps_train/policy_1_2": -175.04205322265625, "logps_train/policy_1_l": -150.9844970703125, "logps_train/policy_1_w": -176.00009155273438, "logps_train/policy_2_2": -138.91378784179688, "logps_train/policy_2_w": -208.2139434814453, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 1.2500420808792114, "rewards_train/1-l": -1.417639970779419, "rewards_train/1-w": 2.994863748550415, "rewards_train/2-2": 2.2486112117767334, "rewards_train/2-w": 1.9623454809188843, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.412503719329834, "rewards_train/margins_1": 1.7448216676712036, "rewards_train/margins_2": 0.2862657308578491, "step": 184 }, { "epoch": 0.55, "logps_train/policy_1_2": -126.43191528320312, "logps_train/policy_1_l": -75.01261901855469, "logps_train/policy_1_w": -71.00021362304688, "logps_train/policy_2_2": -93.90201568603516, "logps_train/policy_2_w": -91.79666900634766, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -68.5, "logps_train/ref_1_w": -79.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -97.0, "rewards_train/1-2": 1.2247779369354248, "rewards_train/1-l": -0.6403238773345947, "rewards_train/1-w": 0.7984163761138916, "rewards_train/2-2": 1.8836265802383423, "rewards_train/2-w": 0.5414265394210815, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.4387402534484863, "rewards_train/margins_1": -0.4263615608215332, "rewards_train/margins_2": 1.3422000408172607, "step": 184 }, { "epoch": 0.55, "logps_train/policy_1_2": -157.99061584472656, "logps_train/policy_1_l": -191.5321044921875, "logps_train/policy_1_w": -94.07170104980469, "logps_train/policy_2_2": -127.65103149414062, "logps_train/policy_2_w": -113.15843200683594, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -122.5, "rewards_train/1-2": 1.5884373188018799, "rewards_train/1-l": -2.137194871902466, "rewards_train/1-w": 1.193221092224121, "rewards_train/2-2": 2.1598970890045166, "rewards_train/2-w": 0.9521250128746033, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.330415964126587, "rewards_train/margins_1": -0.3952162265777588, "rewards_train/margins_2": 1.2077720761299133, "step": 184 }, { "epoch": 0.55, "logps_train/policy_1_2": -161.56715393066406, "logps_train/policy_1_l": -265.71142578125, "logps_train/policy_1_w": -189.63958740234375, "logps_train/policy_2_2": -134.10226440429688, "logps_train/policy_2_w": -224.76425170898438, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -250.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -253.0, "rewards_train/1-2": 2.104222536087036, "rewards_train/1-l": -1.6414566040039062, "rewards_train/1-w": 3.431744337081909, "rewards_train/2-2": 2.293679714202881, "rewards_train/2-w": 2.8009181022644043, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.073200941085815, "rewards_train/margins_1": 1.327521800994873, "rewards_train/margins_2": -0.5072383880615234, "step": 185 }, { "epoch": 0.55, "logps_train/policy_1_2": -146.47393798828125, "logps_train/policy_1_l": -173.59861755371094, "logps_train/policy_1_w": -139.24859619140625, "logps_train/policy_2_2": -115.26010131835938, "logps_train/policy_2_w": -164.4030303955078, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.4307315349578857, "rewards_train/1-l": -1.2534159421920776, "rewards_train/1-w": 2.343109369277954, "rewards_train/2-2": 2.2271153926849365, "rewards_train/2-w": 1.5106741189956665, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.5965253114700317, "rewards_train/margins_1": 0.9123778343200684, "rewards_train/margins_2": 0.71644127368927, "step": 185 }, { "epoch": 0.55, "logps_train/policy_1_2": -109.9035415649414, "logps_train/policy_1_l": -182.4771728515625, "logps_train/policy_1_w": -110.48408508300781, "logps_train/policy_2_2": -78.33407592773438, "logps_train/policy_2_w": -139.83663940429688, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -101.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.6002711057662964, "rewards_train/1-l": -2.0011842250823975, "rewards_train/1-w": 2.2875289916992188, "rewards_train/2-2": 2.279092788696289, "rewards_train/2-w": 1.5788356065750122, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.288713216781616, "rewards_train/margins_1": 0.6872578859329224, "rewards_train/margins_2": 0.7002571821212769, "step": 185 }, { "epoch": 0.55, "logps_train/policy_1_2": -121.88919067382812, "logps_train/policy_1_l": -95.32075500488281, "logps_train/policy_1_w": -95.78048706054688, "logps_train/policy_2_2": -99.99105072021484, "logps_train/policy_2_w": -119.12265014648438, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -90.5, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -117.5, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.1868621110916138, "rewards_train/1-l": -0.49809110164642334, "rewards_train/1-w": 1.8492950201034546, "rewards_train/2-2": 1.735269546508789, "rewards_train/2-w": 1.0037511587142944, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.347386121749878, "rewards_train/margins_1": 0.6624329090118408, "rewards_train/margins_2": 0.7315183877944946, "step": 185 }, { "epoch": 0.55, "logps_train/policy_1_2": -157.11094665527344, "logps_train/policy_1_l": -156.1564178466797, "logps_train/policy_1_w": -92.96070861816406, "logps_train/policy_2_2": -119.76988220214844, "logps_train/policy_2_w": -120.1943588256836, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.336561918258667, "rewards_train/1-l": -1.2192556858062744, "rewards_train/1-w": 1.7242424488067627, "rewards_train/2-2": 2.225355625152588, "rewards_train/2-w": 1.5868141651153564, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.943498134613037, "rewards_train/margins_1": 0.3876805305480957, "rewards_train/margins_2": 0.6385414600372314, "step": 185 }, { "epoch": 0.55, "logps_train/policy_1_2": -207.15902709960938, "logps_train/policy_1_l": -125.03805541992188, "logps_train/policy_1_w": -134.15444946289062, "logps_train/policy_2_2": -168.06976318359375, "logps_train/policy_2_w": -195.89822387695312, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 2.159097194671631, "rewards_train/1-l": -0.7760714292526245, "rewards_train/1-w": 2.394223213195801, "rewards_train/2-2": 3.6430249214172363, "rewards_train/2-w": 1.1413302421569824, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.1702946424484253, "rewards_train/margins_1": 0.23512601852416992, "rewards_train/margins_2": 2.501694679260254, "step": 185 }, { "epoch": 0.55, "logps_train/policy_1_2": -226.06826782226562, "logps_train/policy_1_l": -243.58819580078125, "logps_train/policy_1_w": -178.495361328125, "logps_train/policy_2_2": -183.91835021972656, "logps_train/policy_2_w": -211.1389923095703, "logps_train/ref_1_2": -255.0, "logps_train/ref_1_l": -238.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -225.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 2.9087984561920166, "rewards_train/1-l": -0.5916329622268677, "rewards_train/1-w": 2.5699946880340576, "rewards_train/2-2": 4.11754035949707, "rewards_train/2-w": 1.8728199005126953, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1616276502609253, "rewards_train/margins_1": -0.338803768157959, "rewards_train/margins_2": 2.244720458984375, "step": 185 }, { "epoch": 0.55, "logps_train/policy_1_2": -92.87374114990234, "logps_train/policy_1_l": -86.96562194824219, "logps_train/policy_1_w": -87.32139587402344, "logps_train/policy_2_2": -69.92639923095703, "logps_train/policy_2_w": -109.49734497070312, "logps_train/ref_1_2": -107.0, "logps_train/ref_1_l": -81.5, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -117.0, "rewards_train/1-2": 1.4360637664794922, "rewards_train/1-l": -0.5630800724029541, "rewards_train/1-w": 1.6017334461212158, "rewards_train/2-2": 1.6354848146438599, "rewards_train/2-w": 0.7582371234893799, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.16481351852417, "rewards_train/margins_1": 0.16566967964172363, "rewards_train/margins_2": 0.87724769115448, "step": 185 }, { "epoch": 0.56, "learning_rate": 4.327822687142818e-06, "loss": 0.8713, "step": 186 }, { "epoch": 0.56, "logps_train/policy_1_2": -161.7005615234375, "logps_train/policy_1_l": -188.27395629882812, "logps_train/policy_1_w": -171.59750366210938, "logps_train/policy_2_2": -135.61328125, "logps_train/policy_2_w": -209.15042114257812, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.9080686569213867, "rewards_train/1-l": -1.6809113025665283, "rewards_train/1-w": 3.0199360847473145, "rewards_train/2-2": 2.3355460166931152, "rewards_train/2-w": 2.109957695007324, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.700847387313843, "rewards_train/margins_1": 1.1118674278259277, "rewards_train/margins_2": 0.22558832168579102, "step": 186 }, { "epoch": 0.56, "logps_train/policy_1_2": -142.29217529296875, "logps_train/policy_1_l": -172.6532745361328, "logps_train/policy_1_w": -143.27011108398438, "logps_train/policy_2_2": -119.95578002929688, "logps_train/policy_2_w": -174.35646057128906, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.6426565647125244, "rewards_train/1-l": -1.797749400138855, "rewards_train/1-w": 2.770254611968994, "rewards_train/2-2": 2.3512983322143555, "rewards_train/2-w": 1.5795884132385254, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.568004012107849, "rewards_train/margins_1": 1.1275980472564697, "rewards_train/margins_2": 0.7717099189758301, "step": 186 }, { "epoch": 0.56, "logps_train/policy_1_2": -132.30068969726562, "logps_train/policy_1_l": -150.56048583984375, "logps_train/policy_1_w": -130.25587463378906, "logps_train/policy_2_2": -114.70811462402344, "logps_train/policy_2_w": -155.03736877441406, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 1.9496173858642578, "rewards_train/1-l": -1.8049010038375854, "rewards_train/1-w": 2.957226037979126, "rewards_train/2-2": 2.112001419067383, "rewards_train/2-w": 2.4087634086608887, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.762127041816711, "rewards_train/margins_1": 1.0076086521148682, "rewards_train/margins_2": -0.29676198959350586, "step": 186 }, { "epoch": 0.56, "logps_train/policy_1_2": -109.44989776611328, "logps_train/policy_1_l": -91.65189361572266, "logps_train/policy_1_w": -66.25558471679688, "logps_train/policy_2_2": -86.7247543334961, "logps_train/policy_2_w": -86.26542663574219, "logps_train/ref_1_2": -118.0, "logps_train/ref_1_l": -80.5, "logps_train/ref_1_w": -77.5, "logps_train/ref_2_2": -104.5, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": 0.8565719723701477, "rewards_train/1-l": -1.123685359954834, "rewards_train/1-w": 1.1092069149017334, "rewards_train/2-2": 1.784360647201538, "rewards_train/2-w": 0.8756053447723389, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.2328922748565674, "rewards_train/margins_1": 0.2526349425315857, "rewards_train/margins_2": 0.9087553024291992, "step": 186 }, { "epoch": 0.56, "logps_train/policy_1_2": -121.723388671875, "logps_train/policy_1_l": -106.13323974609375, "logps_train/policy_1_w": -94.0186767578125, "logps_train/policy_2_2": -108.82307434082031, "logps_train/policy_2_w": -110.5537109375, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -100.5, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 1.8839110136032104, "rewards_train/1-l": -0.5610779523849487, "rewards_train/1-w": 1.177038311958313, "rewards_train/2-2": 2.1208176612854004, "rewards_train/2-w": 0.72900390625, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.7381162643432617, "rewards_train/margins_1": -0.7068727016448975, "rewards_train/margins_2": 1.3918137550354004, "step": 186 }, { "epoch": 0.56, "logps_train/policy_1_2": -203.94338989257812, "logps_train/policy_1_l": -210.8861083984375, "logps_train/policy_1_w": -212.24798583984375, "logps_train/policy_2_2": -158.9848175048828, "logps_train/policy_2_w": -253.8717041015625, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -252.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -280.0, "rewards_train/1-2": 1.7306606769561768, "rewards_train/1-l": -2.2249393463134766, "rewards_train/1-w": 3.8595762252807617, "rewards_train/2-2": 3.346830368041992, "rewards_train/2-w": 2.7081408500671387, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.084515571594238, "rewards_train/margins_1": 2.128915548324585, "rewards_train/margins_2": 0.6386895179748535, "step": 186 }, { "epoch": 0.56, "logps_train/policy_1_2": -236.2397918701172, "logps_train/policy_1_l": -258.3691711425781, "logps_train/policy_1_w": -160.408935546875, "logps_train/policy_2_2": -178.57603454589844, "logps_train/policy_2_w": -196.77911376953125, "logps_train/ref_1_2": -255.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -219.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 1.893207311630249, "rewards_train/1-l": -2.8142600059509277, "rewards_train/1-w": 2.6480722427368164, "rewards_train/2-2": 3.984584331512451, "rewards_train/2-w": 2.047868251800537, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.462332248687744, "rewards_train/margins_1": 0.7548649311065674, "rewards_train/margins_2": 1.936716079711914, "step": 186 }, { "epoch": 0.56, "logps_train/policy_1_2": -227.243408203125, "logps_train/policy_1_l": -221.64576721191406, "logps_train/policy_1_w": -184.20306396484375, "logps_train/policy_2_2": -188.76998901367188, "logps_train/policy_2_w": -214.668212890625, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -221.0, "logps_train/ref_2_2": -220.0, "logps_train/ref_2_w": -243.0, "rewards_train/1-2": 2.013939380645752, "rewards_train/1-l": -1.694777250289917, "rewards_train/1-w": 3.67500638961792, "rewards_train/2-2": 3.0448760986328125, "rewards_train/2-w": 2.8503646850585938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.369783639907837, "rewards_train/margins_1": 1.661067008972168, "rewards_train/margins_2": 0.19451141357421875, "step": 186 }, { "epoch": 0.56, "logps_train/policy_1_2": -237.04129028320312, "logps_train/policy_1_l": -235.25022888183594, "logps_train/policy_1_w": -175.7890625, "logps_train/policy_2_2": -182.14028930664062, "logps_train/policy_2_w": -239.84188842773438, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -221.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -266.0, "rewards_train/1-2": 2.0771214962005615, "rewards_train/1-l": -1.3980703353881836, "rewards_train/1-w": 3.7898433208465576, "rewards_train/2-2": 3.4047203063964844, "rewards_train/2-w": 2.603311777114868, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.187913656234741, "rewards_train/margins_1": 1.712721824645996, "rewards_train/margins_2": 0.8014085292816162, "step": 187 }, { "epoch": 0.56, "logps_train/policy_1_2": -180.49844360351562, "logps_train/policy_1_l": -171.15383911132812, "logps_train/policy_1_w": -91.05876159667969, "logps_train/policy_2_2": -144.52691650390625, "logps_train/policy_2_w": -112.47496032714844, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 1.1735936403274536, "rewards_train/1-l": -1.4770530462265015, "rewards_train/1-w": 1.705061912536621, "rewards_train/2-2": 2.36195707321167, "rewards_train/2-w": 1.4665660858154297, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.1821149587631226, "rewards_train/margins_1": 0.5314682722091675, "rewards_train/margins_2": 0.8953909873962402, "step": 187 }, { "epoch": 0.56, "logps_train/policy_1_2": -163.0885772705078, "logps_train/policy_1_l": -162.44642639160156, "logps_train/policy_1_w": -152.34278869628906, "logps_train/policy_2_2": -137.7022705078125, "logps_train/policy_2_w": -197.3952178955078, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.2739553451538086, "rewards_train/1-l": -1.5475711822509766, "rewards_train/1-w": 1.9250962734222412, "rewards_train/2-2": 1.9754762649536133, "rewards_train/2-w": 1.1307905912399292, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4726674556732178, "rewards_train/margins_1": 0.6511409282684326, "rewards_train/margins_2": 0.8446856737136841, "step": 187 }, { "epoch": 0.56, "logps_train/policy_1_2": -211.87722778320312, "logps_train/policy_1_l": -179.5014190673828, "logps_train/policy_1_w": -165.1951446533203, "logps_train/policy_2_2": -185.941650390625, "logps_train/policy_2_w": -187.8682861328125, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -215.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 2.437276840209961, "rewards_train/1-l": -1.204439401626587, "rewards_train/1-w": 3.122575521469116, "rewards_train/2-2": 2.929272174835205, "rewards_train/2-w": 2.7988171577453613, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.327014923095703, "rewards_train/margins_1": 0.6852986812591553, "rewards_train/margins_2": 0.13045501708984375, "step": 187 }, { "epoch": 0.56, "logps_train/policy_1_2": -131.77606201171875, "logps_train/policy_1_l": -221.078857421875, "logps_train/policy_1_w": -118.01496887207031, "logps_train/policy_2_2": -111.41215515136719, "logps_train/policy_2_w": -136.9385986328125, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.2848939895629883, "rewards_train/1-l": -1.718823790550232, "rewards_train/1-w": 2.854069471359253, "rewards_train/2-2": 1.774409532546997, "rewards_train/2-w": 2.421814203262329, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.572893261909485, "rewards_train/margins_1": 1.5691754817962646, "rewards_train/margins_2": -0.647404670715332, "step": 187 }, { "epoch": 0.56, "logps_train/policy_1_2": -156.52760314941406, "logps_train/policy_1_l": -139.88941955566406, "logps_train/policy_1_w": -92.35275268554688, "logps_train/policy_2_2": -119.41028594970703, "logps_train/policy_2_w": -116.36914825439453, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.373801827430725, "rewards_train/1-l": -1.9828872680664062, "rewards_train/1-w": 1.9674599170684814, "rewards_train/2-2": 2.421471118927002, "rewards_train/2-w": 1.339646816253662, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9503471851348877, "rewards_train/margins_1": 0.5936580896377563, "rewards_train/margins_2": 1.0818243026733398, "step": 187 }, { "epoch": 0.56, "logps_train/policy_1_2": -227.27005004882812, "logps_train/policy_1_l": -183.1260223388672, "logps_train/policy_1_w": -130.80239868164062, "logps_train/policy_2_2": -184.89495849609375, "logps_train/policy_2_w": -159.1461181640625, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.3386213779449463, "rewards_train/1-l": -1.4333909749984741, "rewards_train/1-w": 2.6017918586730957, "rewards_train/2-2": 2.136286735534668, "rewards_train/2-w": 1.5603880882263184, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.03518283367157, "rewards_train/margins_1": 1.2631704807281494, "rewards_train/margins_2": 0.5758986473083496, "step": 187 }, { "epoch": 0.56, "logps_train/policy_1_2": -147.42886352539062, "logps_train/policy_1_l": -211.98611450195312, "logps_train/policy_1_w": -138.74282836914062, "logps_train/policy_2_2": -118.98272705078125, "logps_train/policy_2_w": -176.84088134765625, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.4508649110794067, "rewards_train/1-l": -2.2923622131347656, "rewards_train/1-w": 2.2038421630859375, "rewards_train/2-2": 1.7579772472381592, "rewards_train/2-w": 1.4096622467041016, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.496204376220703, "rewards_train/margins_1": 0.7529772520065308, "rewards_train/margins_2": 0.3483150005340576, "step": 187 }, { "epoch": 0.56, "learning_rate": 4.310883784409307e-06, "loss": 0.7365, "step": 188 }, { "epoch": 0.56, "logps_train/policy_1_2": -121.19791412353516, "logps_train/policy_1_l": -160.81857299804688, "logps_train/policy_1_w": -102.77503967285156, "logps_train/policy_2_2": -86.49061584472656, "logps_train/policy_2_w": -138.50830078125, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.7223963737487793, "rewards_train/1-l": -1.3049044609069824, "rewards_train/1-w": 1.7873395681381226, "rewards_train/2-2": 2.2353131771087646, "rewards_train/2-w": 1.1655762195587158, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.092244029045105, "rewards_train/margins_1": 0.06494319438934326, "rewards_train/margins_2": 1.0697369575500488, "step": 188 }, { "epoch": 0.56, "logps_train/policy_1_2": -193.41024780273438, "logps_train/policy_1_l": -216.56712341308594, "logps_train/policy_1_w": -110.63774871826172, "logps_train/policy_2_2": -152.94371032714844, "logps_train/policy_2_w": -136.3104248046875, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.1620988845825195, "rewards_train/1-l": -1.1565165519714355, "rewards_train/1-w": 1.2268496751785278, "rewards_train/2-2": 2.157972812652588, "rewards_train/2-w": 0.8400505781173706, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.3833662271499634, "rewards_train/margins_1": 0.0647507905960083, "rewards_train/margins_2": 1.3179222345352173, "step": 188 }, { "epoch": 0.56, "logps_train/policy_1_2": -240.34263610839844, "logps_train/policy_1_l": -152.38430786132812, "logps_train/policy_1_w": -170.03805541992188, "logps_train/policy_2_2": -205.972412109375, "logps_train/policy_2_w": -195.26498413085938, "logps_train/ref_1_2": -252.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -230.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.1247191429138184, "rewards_train/1-l": -0.6500039100646973, "rewards_train/1-w": 2.8779828548431396, "rewards_train/2-2": 2.405005693435669, "rewards_train/2-w": 2.203190803527832, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 3.527986764907837, "rewards_train/margins_1": 1.7532637119293213, "rewards_train/margins_2": 0.20181488990783691, "step": 188 }, { "epoch": 0.56, "logps_train/policy_1_2": -160.95213317871094, "logps_train/policy_1_l": -149.46368408203125, "logps_train/policy_1_w": -71.42056274414062, "logps_train/policy_2_2": -127.01891326904297, "logps_train/policy_2_w": -106.31639099121094, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -89.5, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -118.5, "rewards_train/1-2": 1.3000991344451904, "rewards_train/1-l": -1.3143367767333984, "rewards_train/1-w": 1.7995452880859375, "rewards_train/2-2": 2.2942018508911133, "rewards_train/2-w": 1.2148457765579224, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.113882064819336, "rewards_train/margins_1": 0.49944615364074707, "rewards_train/margins_2": 1.079356074333191, "step": 188 }, { "epoch": 0.56, "logps_train/policy_1_2": -188.20974731445312, "logps_train/policy_1_l": -131.83282470703125, "logps_train/policy_1_w": -119.62816619873047, "logps_train/policy_2_2": -155.09616088867188, "logps_train/policy_2_w": -149.0205535888672, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.7977750301361084, "rewards_train/1-l": -0.7078923583030701, "rewards_train/1-w": 2.1149182319641113, "rewards_train/2-2": 2.5903830528259277, "rewards_train/2-w": 1.7729442119598389, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.8228105902671814, "rewards_train/margins_1": 0.31714320182800293, "rewards_train/margins_2": 0.8174388408660889, "step": 188 }, { "epoch": 0.56, "logps_train/policy_1_2": -112.92464447021484, "logps_train/policy_1_l": -157.7500762939453, "logps_train/policy_1_w": -99.03377532958984, "logps_train/policy_2_2": -96.91934204101562, "logps_train/policy_2_w": -117.62765502929688, "logps_train/ref_1_2": -125.5, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.2630044221878052, "rewards_train/1-l": -2.0270586013793945, "rewards_train/1-w": 1.747794508934021, "rewards_train/2-2": 1.7320891618728638, "rewards_train/2-w": 1.118484377861023, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.7748531103134155, "rewards_train/margins_1": 0.4847900867462158, "rewards_train/margins_2": 0.6136047840118408, "step": 188 }, { "epoch": 0.56, "logps_train/policy_1_2": -147.31275939941406, "logps_train/policy_1_l": -165.13247680664062, "logps_train/policy_1_w": -90.77393341064453, "logps_train/policy_2_2": -107.66642761230469, "logps_train/policy_2_w": -122.44213104248047, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.2655985355377197, "rewards_train/1-l": -1.1718416213989258, "rewards_train/1-w": 1.4538564682006836, "rewards_train/2-2": 1.8724197149276733, "rewards_train/2-w": 0.854224681854248, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.6256980895996094, "rewards_train/margins_1": 0.18825793266296387, "rewards_train/margins_2": 1.0181950330734253, "step": 188 }, { "epoch": 0.56, "logps_train/policy_1_2": -179.95831298828125, "logps_train/policy_1_l": -195.09812927246094, "logps_train/policy_1_w": -164.85629272460938, "logps_train/policy_2_2": -143.1949920654297, "logps_train/policy_2_w": -214.1050567626953, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -227.0, "rewards_train/1-2": 1.2045605182647705, "rewards_train/1-l": -1.8265111446380615, "rewards_train/1-w": 2.683121681213379, "rewards_train/2-2": 2.0140938758850098, "rewards_train/2-w": 1.2973068952560425, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.50963282585144, "rewards_train/margins_1": 1.4785611629486084, "rewards_train/margins_2": 0.7167869806289673, "step": 188 }, { "epoch": 0.57, "logps_train/policy_1_2": -93.01333618164062, "logps_train/policy_1_l": -104.25723266601562, "logps_train/policy_1_w": -99.11640167236328, "logps_train/policy_2_2": -73.24149322509766, "logps_train/policy_2_w": -125.46842193603516, "logps_train/ref_1_2": -106.5, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -90.5, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.3642914295196533, "rewards_train/1-l": -1.3280673027038574, "rewards_train/1-w": 2.327422618865967, "rewards_train/2-2": 1.721163034439087, "rewards_train/2-w": 1.4672203063964844, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.655489921569824, "rewards_train/margins_1": 0.9631311893463135, "rewards_train/margins_2": 0.25394272804260254, "step": 189 }, { "epoch": 0.57, "logps_train/policy_1_2": -164.26214599609375, "logps_train/policy_1_l": -139.9580535888672, "logps_train/policy_1_w": -120.19664001464844, "logps_train/policy_2_2": -135.66775512695312, "logps_train/policy_2_w": -141.61245727539062, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 2.067535877227783, "rewards_train/1-l": -0.6692425608634949, "rewards_train/1-w": 1.7506487369537354, "rewards_train/2-2": 2.555100440979004, "rewards_train/2-w": 1.0825035572052002, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.4198912978172302, "rewards_train/margins_1": -0.31688714027404785, "rewards_train/margins_2": 1.4725968837738037, "step": 189 }, { "epoch": 0.57, "logps_train/policy_1_2": -60.381874084472656, "logps_train/policy_1_l": -108.65612030029297, "logps_train/policy_1_w": -63.970703125, "logps_train/policy_2_2": -50.16545867919922, "logps_train/policy_2_w": -81.8424301147461, "logps_train/ref_1_2": -78.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -67.5, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 1.775484323501587, "rewards_train/1-l": -1.4398305416107178, "rewards_train/1-w": 2.2201175689697266, "rewards_train/2-2": 1.7378487586975098, "rewards_train/2-w": 1.6196630001068115, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.6599481105804443, "rewards_train/margins_1": 0.44463324546813965, "rewards_train/margins_2": 0.11818575859069824, "step": 189 }, { "epoch": 0.57, "logps_train/policy_1_2": -228.59237670898438, "logps_train/policy_1_l": -160.54644775390625, "logps_train/policy_1_w": -201.96710205078125, "logps_train/policy_2_2": -186.36585998535156, "logps_train/policy_2_w": -239.15142822265625, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -232.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.8681062459945679, "rewards_train/1-l": -1.3373584747314453, "rewards_train/1-w": 2.9626643657684326, "rewards_train/2-2": 3.2077503204345703, "rewards_train/2-w": 1.5270447731018066, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.300022840499878, "rewards_train/margins_1": 1.0945581197738647, "rewards_train/margins_2": 1.6807055473327637, "step": 189 }, { "epoch": 0.57, "logps_train/policy_1_2": -219.9979248046875, "logps_train/policy_1_l": -201.86138916015625, "logps_train/policy_1_w": -166.6822052001953, "logps_train/policy_2_2": -192.76638793945312, "logps_train/policy_2_w": -195.299560546875, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.798645257949829, "rewards_train/1-l": -1.9048880338668823, "rewards_train/1-w": 2.4192795753479004, "rewards_train/2-2": 2.521799087524414, "rewards_train/2-w": 1.8294193744659424, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.324167609214783, "rewards_train/margins_1": 0.6206343173980713, "rewards_train/margins_2": 0.6923797130584717, "step": 189 }, { "epoch": 0.57, "logps_train/policy_1_2": -103.97476196289062, "logps_train/policy_1_l": -83.9677963256836, "logps_train/policy_1_w": -89.89250183105469, "logps_train/policy_2_2": -87.42877197265625, "logps_train/policy_2_w": -105.6222915649414, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -78.5, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 1.5298670530319214, "rewards_train/1-l": -0.5663108825683594, "rewards_train/1-w": 1.5146565437316895, "rewards_train/2-2": 1.7606379985809326, "rewards_train/2-w": 1.0709738731384277, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.080967426300049, "rewards_train/margins_1": -0.015210509300231934, "rewards_train/margins_2": 0.6896641254425049, "step": 189 }, { "epoch": 0.57, "logps_train/policy_1_2": -131.39846801757812, "logps_train/policy_1_l": -99.13711547851562, "logps_train/policy_1_w": -107.2514877319336, "logps_train/policy_2_2": -110.33929443359375, "logps_train/policy_2_w": -131.05120849609375, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 1.72890305519104, "rewards_train/1-l": -0.6078519225120544, "rewards_train/1-w": 1.6029764413833618, "rewards_train/2-2": 2.12544584274292, "rewards_train/2-w": 1.2183175086975098, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.2108283638954163, "rewards_train/margins_1": -0.12592661380767822, "rewards_train/margins_2": 0.9071283340454102, "step": 189 }, { "epoch": 0.57, "logps_train/policy_1_2": -106.96464538574219, "logps_train/policy_1_l": -97.950439453125, "logps_train/policy_1_w": -78.90914916992188, "logps_train/policy_2_2": -83.16268920898438, "logps_train/policy_2_w": -100.81855010986328, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -89.5, "logps_train/ref_1_w": -94.0, "logps_train/ref_2_2": -92.5, "logps_train/ref_2_w": -112.0, "rewards_train/1-2": 0.49728602170944214, "rewards_train/1-l": -0.8387942314147949, "rewards_train/1-w": 1.5276399850845337, "rewards_train/2-2": 0.9235755205154419, "rewards_train/2-w": 1.145684003829956, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.3664342164993286, "rewards_train/margins_1": 1.0303539633750916, "rewards_train/margins_2": -0.22210848331451416, "step": 189 }, { "epoch": 0.57, "learning_rate": 4.293768142610828e-06, "loss": 0.8262, "step": 190 }, { "epoch": 0.57, "logps_train/policy_1_2": -99.74652099609375, "logps_train/policy_1_l": -56.94672393798828, "logps_train/policy_1_w": -65.77855682373047, "logps_train/policy_2_2": -83.79356384277344, "logps_train/policy_2_w": -80.1520004272461, "logps_train/ref_1_2": -108.0, "logps_train/ref_1_l": -48.75, "logps_train/ref_1_w": -75.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -86.0, "rewards_train/1-2": 0.8378480076789856, "rewards_train/1-l": -0.8189889192581177, "rewards_train/1-w": 0.905738115310669, "rewards_train/2-2": 1.011659026145935, "rewards_train/2-w": 0.5635116696357727, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.7247270345687866, "rewards_train/margins_1": 0.06789010763168335, "rewards_train/margins_2": 0.44814735651016235, "step": 190 }, { "epoch": 0.57, "logps_train/policy_1_2": -84.23180389404297, "logps_train/policy_1_l": -62.167354583740234, "logps_train/policy_1_w": -84.5728988647461, "logps_train/policy_2_2": -71.89031982421875, "logps_train/policy_2_w": -99.17668914794922, "logps_train/ref_1_2": -102.0, "logps_train/ref_1_l": -60.0, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -113.0, "rewards_train/1-2": 1.787757158279419, "rewards_train/1-l": -0.2253292351961136, "rewards_train/1-w": 1.652085304260254, "rewards_train/2-2": 2.0176093578338623, "rewards_train/2-w": 1.3510816097259521, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.8774145394563675, "rewards_train/margins_1": -0.13567185401916504, "rewards_train/margins_2": 0.6665277481079102, "step": 190 }, { "epoch": 0.57, "logps_train/policy_1_2": -127.04936218261719, "logps_train/policy_1_l": -127.78927612304688, "logps_train/policy_1_w": -111.16858673095703, "logps_train/policy_2_2": -104.05952453613281, "logps_train/policy_2_w": -131.7855682373047, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -109.5, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 1.1185024976730347, "rewards_train/1-l": -1.813449501991272, "rewards_train/1-w": 1.998766303062439, "rewards_train/2-2": 1.5143603086471558, "rewards_train/2-w": 1.3370682001113892, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.812215805053711, "rewards_train/margins_1": 0.8802638053894043, "rewards_train/margins_2": 0.1772921085357666, "step": 190 }, { "epoch": 0.57, "logps_train/policy_1_2": -192.95201110839844, "logps_train/policy_1_l": -164.77822875976562, "logps_train/policy_1_w": -139.78111267089844, "logps_train/policy_2_2": -153.44677734375, "logps_train/policy_2_w": -172.90643310546875, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 1.8227678537368774, "rewards_train/1-l": -1.8189365863800049, "rewards_train/1-w": 2.0523571968078613, "rewards_train/2-2": 2.8695805072784424, "rewards_train/2-w": 0.9671696424484253, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.871293783187866, "rewards_train/margins_1": 0.2295893430709839, "rewards_train/margins_2": 1.902410864830017, "step": 190 }, { "epoch": 0.57, "logps_train/policy_1_2": -121.70602416992188, "logps_train/policy_1_l": -167.50991821289062, "logps_train/policy_1_w": -131.43588256835938, "logps_train/policy_2_2": -113.88656616210938, "logps_train/policy_2_w": -149.15365600585938, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.4493197202682495, "rewards_train/1-l": -1.237906813621521, "rewards_train/1-w": 1.4673500061035156, "rewards_train/2-2": 1.3949373960494995, "rewards_train/2-w": 1.0815099477767944, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7052568197250366, "rewards_train/margins_1": 0.018030285835266113, "rewards_train/margins_2": 0.3134274482727051, "step": 190 }, { "epoch": 0.57, "logps_train/policy_1_2": -133.63380432128906, "logps_train/policy_1_l": -178.5126953125, "logps_train/policy_1_w": -121.90760803222656, "logps_train/policy_2_2": -111.62394714355469, "logps_train/policy_2_w": -148.00839233398438, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.5413072109222412, "rewards_train/1-l": -1.8754873275756836, "rewards_train/1-w": 1.2100211381912231, "rewards_train/2-2": 2.0954177379608154, "rewards_train/2-w": 0.7647863626480103, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0855084657669067, "rewards_train/margins_1": -0.33128607273101807, "rewards_train/margins_2": 1.3306313753128052, "step": 190 }, { "epoch": 0.57, "logps_train/policy_1_2": -142.83889770507812, "logps_train/policy_1_l": -161.36819458007812, "logps_train/policy_1_w": -124.09526824951172, "logps_train/policy_2_2": -112.60054016113281, "logps_train/policy_2_w": -160.43881225585938, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.7567355632781982, "rewards_train/1-l": -1.967288851737976, "rewards_train/1-w": 2.1717238426208496, "rewards_train/2-2": 2.0680713653564453, "rewards_train/2-w": 1.1233067512512207, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.139012694358826, "rewards_train/margins_1": 0.41498827934265137, "rewards_train/margins_2": 0.9447646141052246, "step": 190 }, { "epoch": 0.57, "logps_train/policy_1_2": -208.05477905273438, "logps_train/policy_1_l": -156.15634155273438, "logps_train/policy_1_w": -156.4095916748047, "logps_train/policy_2_2": -175.7252655029297, "logps_train/policy_2_w": -193.51089477539062, "logps_train/ref_1_2": -225.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -201.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.6913979053497314, "rewards_train/1-l": -1.226766586303711, "rewards_train/1-w": 2.4746651649475098, "rewards_train/2-2": 2.496222496032715, "rewards_train/2-w": 1.6864118576049805, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7014317512512207, "rewards_train/margins_1": 0.7832672595977783, "rewards_train/margins_2": 0.8098106384277344, "step": 190 }, { "epoch": 0.57, "logps_train/policy_1_2": -158.9025421142578, "logps_train/policy_1_l": -200.70001220703125, "logps_train/policy_1_w": -196.16204833984375, "logps_train/policy_2_2": -132.47964477539062, "logps_train/policy_2_w": -225.6671142578125, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -236.0, "rewards_train/1-2": 1.069901943206787, "rewards_train/1-l": -2.165314197540283, "rewards_train/1-w": 2.0449156761169434, "rewards_train/2-2": 1.8368014097213745, "rewards_train/2-w": 1.0600461959838867, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.210229873657227, "rewards_train/margins_1": 0.9750137329101562, "rewards_train/margins_2": 0.7767552137374878, "step": 191 }, { "epoch": 0.57, "logps_train/policy_1_2": -139.9027099609375, "logps_train/policy_1_l": -136.28790283203125, "logps_train/policy_1_w": -114.17993927001953, "logps_train/policy_2_2": -114.15396881103516, "logps_train/policy_2_w": -147.73556518554688, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 0.9099721908569336, "rewards_train/1-l": -2.074249505996704, "rewards_train/1-w": 2.3195061683654785, "rewards_train/2-2": 1.6427083015441895, "rewards_train/2-w": 1.6846468448638916, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.393755674362183, "rewards_train/margins_1": 1.409533977508545, "rewards_train/margins_2": -0.04193854331970215, "step": 191 }, { "epoch": 0.57, "logps_train/policy_1_2": -97.57633209228516, "logps_train/policy_1_l": -53.79306411743164, "logps_train/policy_1_w": -40.71607208251953, "logps_train/policy_2_2": -72.72333526611328, "logps_train/policy_2_w": -49.57960891723633, "logps_train/ref_1_2": -105.5, "logps_train/ref_1_l": -48.5, "logps_train/ref_1_w": -47.5, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -55.0, "rewards_train/1-2": 0.7751796841621399, "rewards_train/1-l": -0.5192477703094482, "rewards_train/1-w": 0.6577870845794678, "rewards_train/2-2": 1.3163379430770874, "rewards_train/2-w": 0.5172345638275146, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.177034854888916, "rewards_train/margins_1": -0.11739259958267212, "rewards_train/margins_2": 0.7991033792495728, "step": 191 }, { "epoch": 0.57, "logps_train/policy_1_2": -188.85385131835938, "logps_train/policy_1_l": -215.72518920898438, "logps_train/policy_1_w": -163.5721435546875, "logps_train/policy_2_2": -149.520263671875, "logps_train/policy_2_w": -206.1148681640625, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -223.0, "rewards_train/1-2": 1.423600196838379, "rewards_train/1-l": -2.4381442070007324, "rewards_train/1-w": 2.2089977264404297, "rewards_train/2-2": 2.3288331031799316, "rewards_train/2-w": 1.6978895664215088, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.647141933441162, "rewards_train/margins_1": 0.7853975296020508, "rewards_train/margins_2": 0.6309435367584229, "step": 191 }, { "epoch": 0.57, "logps_train/policy_1_2": -160.3531494140625, "logps_train/policy_1_l": -145.7583770751953, "logps_train/policy_1_w": -121.205322265625, "logps_train/policy_2_2": -131.04354858398438, "logps_train/policy_2_w": -157.83401489257812, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.1029670238494873, "rewards_train/1-l": -1.1701741218566895, "rewards_train/1-w": 1.8452880382537842, "rewards_train/2-2": 1.5972081422805786, "rewards_train/2-w": 1.1933557987213135, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0154621601104736, "rewards_train/margins_1": 0.7423210144042969, "rewards_train/margins_2": 0.40385234355926514, "step": 191 }, { "epoch": 0.57, "logps_train/policy_1_2": -171.134765625, "logps_train/policy_1_l": -85.48480987548828, "logps_train/policy_1_w": -73.75640106201172, "logps_train/policy_2_2": -145.18191528320312, "logps_train/policy_2_w": -92.9541015625, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -104.0, "rewards_train/1-2": 1.5333991050720215, "rewards_train/1-l": -0.7660595178604126, "rewards_train/1-w": 1.3552196025848389, "rewards_train/2-2": 2.117746591567993, "rewards_train/2-w": 1.071777105331421, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.1212791204452515, "rewards_train/margins_1": -0.17817950248718262, "rewards_train/margins_2": 1.0459694862365723, "step": 191 }, { "epoch": 0.57, "logps_train/policy_1_2": -209.8477783203125, "logps_train/policy_1_l": -283.17236328125, "logps_train/policy_1_w": -156.15213012695312, "logps_train/policy_2_2": -179.68052673339844, "logps_train/policy_2_w": -187.95887756347656, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -256.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -203.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.4589715003967285, "rewards_train/1-l": -2.629735231399536, "rewards_train/1-w": 2.7097859382629395, "rewards_train/2-2": 2.325697422027588, "rewards_train/2-w": 1.9666112661361694, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.339521169662476, "rewards_train/margins_1": 1.250814437866211, "rewards_train/margins_2": 0.35908615589141846, "step": 191 }, { "epoch": 0.57, "logps_train/policy_1_2": -138.73190307617188, "logps_train/policy_1_l": -146.31680297851562, "logps_train/policy_1_w": -99.63301086425781, "logps_train/policy_2_2": -109.67599487304688, "logps_train/policy_2_w": -118.58628845214844, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.468217134475708, "rewards_train/1-l": -1.0414448976516724, "rewards_train/1-w": 1.863261103630066, "rewards_train/2-2": 2.013650417327881, "rewards_train/2-w": 1.2132458686828613, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.9047060012817383, "rewards_train/margins_1": 0.3950439691543579, "rewards_train/margins_2": 0.8004045486450195, "step": 191 }, { "epoch": 0.57, "learning_rate": 4.276477432203849e-06, "loss": 0.8754, "step": 192 }, { "epoch": 0.57, "logps_train/policy_1_2": -110.17860412597656, "logps_train/policy_1_l": -47.968807220458984, "logps_train/policy_1_w": -58.35185241699219, "logps_train/policy_2_2": -86.00685119628906, "logps_train/policy_2_w": -83.00163269042969, "logps_train/ref_1_2": -119.5, "logps_train/ref_1_l": -44.0, "logps_train/ref_1_w": -72.0, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -92.5, "rewards_train/1-2": 0.9204214811325073, "rewards_train/1-l": -0.39961516857147217, "rewards_train/1-w": 1.3296586275100708, "rewards_train/2-2": 1.3961904048919678, "rewards_train/2-w": 0.9678048491477966, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.729273796081543, "rewards_train/margins_1": 0.4092371463775635, "rewards_train/margins_2": 0.42838555574417114, "step": 192 }, { "epoch": 0.57, "logps_train/policy_1_2": -152.23681640625, "logps_train/policy_1_l": -231.50732421875, "logps_train/policy_1_w": -152.412353515625, "logps_train/policy_2_2": -120.0291519165039, "logps_train/policy_2_w": -189.7938995361328, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 1.6263178586959839, "rewards_train/1-l": -3.3757317066192627, "rewards_train/1-w": 2.3509531021118164, "rewards_train/2-2": 1.9978666305541992, "rewards_train/2-w": 1.5049843788146973, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.726684808731079, "rewards_train/margins_1": 0.7246352434158325, "rewards_train/margins_2": 0.49288225173950195, "step": 192 }, { "epoch": 0.57, "logps_train/policy_1_2": -245.20953369140625, "logps_train/policy_1_l": -225.34170532226562, "logps_train/policy_1_w": -113.80609130859375, "logps_train/policy_2_2": -215.56741333007812, "logps_train/policy_2_w": -133.91464233398438, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -242.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 2.1657657623291016, "rewards_train/1-l": -2.5925679206848145, "rewards_train/1-w": 1.985504150390625, "rewards_train/2-2": 2.6971654891967773, "rewards_train/2-w": 1.4897860288619995, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.5780720710754395, "rewards_train/margins_1": -0.18026161193847656, "rewards_train/margins_2": 1.2073794603347778, "step": 192 }, { "epoch": 0.57, "logps_train/policy_1_2": -247.80711364746094, "logps_train/policy_1_l": -265.40106201171875, "logps_train/policy_1_w": -121.6029052734375, "logps_train/policy_2_2": -214.71261596679688, "logps_train/policy_2_w": -140.5528564453125, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -242.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -244.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 2.059913396835327, "rewards_train/1-l": -2.337371826171875, "rewards_train/1-w": 1.735021948814392, "rewards_train/2-2": 2.9037387371063232, "rewards_train/2-w": 1.4244019985198975, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.072393774986267, "rewards_train/margins_1": -0.32489144802093506, "rewards_train/margins_2": 1.4793367385864258, "step": 192 }, { "epoch": 0.57, "logps_train/policy_1_2": -128.32501220703125, "logps_train/policy_1_l": -118.51765441894531, "logps_train/policy_1_w": -107.66929626464844, "logps_train/policy_2_2": -112.06986999511719, "logps_train/policy_2_w": -119.69599914550781, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.4206242561340332, "rewards_train/1-l": -1.5880939960479736, "rewards_train/1-w": 1.8358054161071777, "rewards_train/2-2": 1.7031691074371338, "rewards_train/2-w": 1.6913373470306396, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.4238994121551514, "rewards_train/margins_1": 0.41518115997314453, "rewards_train/margins_2": 0.01183176040649414, "step": 192 }, { "epoch": 0.57, "logps_train/policy_1_2": -119.48797607421875, "logps_train/policy_1_l": -172.04696655273438, "logps_train/policy_1_w": -115.28468322753906, "logps_train/policy_2_2": -99.70252990722656, "logps_train/policy_2_w": -138.99420166015625, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.197295904159546, "rewards_train/1-l": -1.8388032913208008, "rewards_train/1-w": 1.9184074401855469, "rewards_train/2-2": 1.6289653778076172, "rewards_train/2-w": 1.3216739892959595, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.7572107315063477, "rewards_train/margins_1": 0.721111536026001, "rewards_train/margins_2": 0.3072913885116577, "step": 192 }, { "epoch": 0.57, "logps_train/policy_1_2": -127.65177154541016, "logps_train/policy_1_l": -99.03775024414062, "logps_train/policy_1_w": -89.75871276855469, "logps_train/policy_2_2": -107.13059997558594, "logps_train/policy_2_w": -107.68428039550781, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -86.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 1.3379483222961426, "rewards_train/1-l": -1.3154938220977783, "rewards_train/1-w": 1.7694416046142578, "rewards_train/2-2": 1.7150647640228271, "rewards_train/2-w": 1.3503217697143555, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.084935426712036, "rewards_train/margins_1": 0.43149328231811523, "rewards_train/margins_2": 0.3647429943084717, "step": 192 }, { "epoch": 0.57, "logps_train/policy_1_2": -234.84889221191406, "logps_train/policy_1_l": -220.2353973388672, "logps_train/policy_1_w": -197.27865600585938, "logps_train/policy_2_2": -195.81735229492188, "logps_train/policy_2_w": -258.09814453125, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -236.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -278.0, "rewards_train/1-2": 1.1908916234970093, "rewards_train/1-l": -2.5352578163146973, "rewards_train/1-w": 3.895571231842041, "rewards_train/2-2": 2.2221710681915283, "rewards_train/2-w": 1.9167468547821045, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.430829048156738, "rewards_train/margins_1": 2.7046796083450317, "rewards_train/margins_2": 0.30542421340942383, "step": 192 }, { "epoch": 0.58, "logps_train/policy_1_2": -236.3807373046875, "logps_train/policy_1_l": -235.46185302734375, "logps_train/policy_1_w": -228.2000732421875, "logps_train/policy_2_2": -206.3343048095703, "logps_train/policy_2_w": -267.6832580566406, "logps_train/ref_1_2": -252.0, "logps_train/ref_1_l": -214.0, "logps_train/ref_1_w": -256.0, "logps_train/ref_2_2": -226.0, "logps_train/ref_2_w": -284.0, "rewards_train/1-2": 1.5213024616241455, "rewards_train/1-l": -2.1586859226226807, "rewards_train/1-w": 2.8612425327301025, "rewards_train/2-2": 1.9415693283081055, "rewards_train/2-w": 1.5941754579544067, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.019928455352783, "rewards_train/margins_1": 1.339940071105957, "rewards_train/margins_2": 0.34739387035369873, "step": 193 }, { "epoch": 0.58, "logps_train/policy_1_2": -148.29827880859375, "logps_train/policy_1_l": -132.67709350585938, "logps_train/policy_1_w": -97.06773376464844, "logps_train/policy_2_2": -129.17498779296875, "logps_train/policy_2_w": -122.00201416015625, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 1.3717336654663086, "rewards_train/1-l": -0.8505223393440247, "rewards_train/1-w": 1.2026015520095825, "rewards_train/2-2": 1.7700004577636719, "rewards_train/2-w": 0.5044858455657959, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.053123891353607, "rewards_train/margins_1": -0.16913211345672607, "rewards_train/margins_2": 1.265514612197876, "step": 193 }, { "epoch": 0.58, "logps_train/policy_1_2": -224.4414825439453, "logps_train/policy_1_l": -155.63636779785156, "logps_train/policy_1_w": -179.50198364257812, "logps_train/policy_2_2": -187.84405517578125, "logps_train/policy_2_w": -224.80308532714844, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -235.0, "rewards_train/1-2": 1.071476697921753, "rewards_train/1-l": -1.3323874473571777, "rewards_train/1-w": 1.96503746509552, "rewards_train/2-2": 2.0812196731567383, "rewards_train/2-w": 0.9798469543457031, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.2974249124526978, "rewards_train/margins_1": 0.8935607671737671, "rewards_train/margins_2": 1.1013727188110352, "step": 193 }, { "epoch": 0.58, "logps_train/policy_1_2": -205.23513793945312, "logps_train/policy_1_l": -251.031005859375, "logps_train/policy_1_w": -136.20184326171875, "logps_train/policy_2_2": -171.8706512451172, "logps_train/policy_2_w": -171.1850128173828, "logps_train/ref_1_2": -227.0, "logps_train/ref_1_l": -223.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 2.1983625888824463, "rewards_train/1-l": -2.7593514919281006, "rewards_train/1-w": 2.1923162937164307, "rewards_train/2-2": 2.520746946334839, "rewards_train/2-w": 1.647123098373413, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.951667785644531, "rewards_train/margins_1": -0.006046295166015625, "rewards_train/margins_2": 0.8736238479614258, "step": 193 }, { "epoch": 0.58, "logps_train/policy_1_2": -218.90744018554688, "logps_train/policy_1_l": -258.30267333984375, "logps_train/policy_1_w": -159.48513793945312, "logps_train/policy_2_2": -165.39833068847656, "logps_train/policy_2_w": -201.16665649414062, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": 1.475273847579956, "rewards_train/1-l": -2.912299156188965, "rewards_train/1-w": 2.39211106300354, "rewards_train/2-2": 2.8984479904174805, "rewards_train/2-w": 1.2052092552185059, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.304410219192505, "rewards_train/margins_1": 0.916837215423584, "rewards_train/margins_2": 1.6932387351989746, "step": 193 }, { "epoch": 0.58, "logps_train/policy_1_2": -141.911865234375, "logps_train/policy_1_l": -167.66012573242188, "logps_train/policy_1_w": -82.94808959960938, "logps_train/policy_2_2": -125.79144287109375, "logps_train/policy_2_w": -99.98123168945312, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 1.5038331747055054, "rewards_train/1-l": -2.2308552265167236, "rewards_train/1-w": 2.434877872467041, "rewards_train/2-2": 1.8914130926132202, "rewards_train/2-w": 2.0050015449523926, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.665733098983765, "rewards_train/margins_1": 0.9310446977615356, "rewards_train/margins_2": -0.11358845233917236, "step": 193 }, { "epoch": 0.58, "logps_train/policy_1_2": -108.06475830078125, "logps_train/policy_1_l": -93.36602020263672, "logps_train/policy_1_w": -111.29350280761719, "logps_train/policy_2_2": -96.07279968261719, "logps_train/policy_2_w": -141.22535705566406, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.087274193763733, "rewards_train/1-l": -1.0182433128356934, "rewards_train/1-w": 1.685492753982544, "rewards_train/2-2": 1.180342197418213, "rewards_train/2-w": 1.3243393898010254, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.7037360668182373, "rewards_train/margins_1": 0.598218560218811, "rewards_train/margins_2": -0.1439971923828125, "step": 193 }, { "epoch": 0.58, "logps_train/policy_1_2": -192.06556701660156, "logps_train/policy_1_l": -187.26156616210938, "logps_train/policy_1_w": -119.76834869384766, "logps_train/policy_2_2": -164.708251953125, "logps_train/policy_2_w": -151.1773681640625, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 0.521568775177002, "rewards_train/1-l": -2.557405710220337, "rewards_train/1-w": 2.840353012084961, "rewards_train/2-2": 1.4346435070037842, "rewards_train/2-w": 2.132262945175171, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.397758722305298, "rewards_train/margins_1": 2.318784236907959, "rewards_train/margins_2": -0.6976194381713867, "step": 193 }, { "epoch": 0.58, "learning_rate": 4.259013340731224e-06, "loss": 0.7567, "step": 194 }, { "epoch": 0.58, "logps_train/policy_1_2": -209.96588134765625, "logps_train/policy_1_l": -163.92877197265625, "logps_train/policy_1_w": -160.13845825195312, "logps_train/policy_2_2": -174.13284301757812, "logps_train/policy_2_w": -193.816162109375, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": 1.375286340713501, "rewards_train/1-l": -1.1715881824493408, "rewards_train/1-w": 2.1637918949127197, "rewards_train/2-2": 2.2773420810699463, "rewards_train/2-w": 1.4847898483276367, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.3353800773620605, "rewards_train/margins_1": 0.7885055541992188, "rewards_train/margins_2": 0.7925522327423096, "step": 194 }, { "epoch": 0.58, "logps_train/policy_1_2": -109.66578674316406, "logps_train/policy_1_l": -94.70565795898438, "logps_train/policy_1_w": -43.69097900390625, "logps_train/policy_2_2": -88.530517578125, "logps_train/policy_2_w": -54.46072006225586, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -79.0, "logps_train/ref_1_w": -59.0, "logps_train/ref_2_2": -99.5, "logps_train/ref_2_w": -68.5, "rewards_train/1-2": 0.6447492837905884, "rewards_train/1-l": -1.5862882137298584, "rewards_train/1-w": 1.5517032146453857, "rewards_train/2-2": 1.087182641029358, "rewards_train/2-w": 1.4113496541976929, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.137991428375244, "rewards_train/margins_1": 0.9069539308547974, "rewards_train/margins_2": -0.32416701316833496, "step": 194 }, { "epoch": 0.58, "logps_train/policy_1_2": -98.05009460449219, "logps_train/policy_1_l": -186.40582275390625, "logps_train/policy_1_w": -117.08634185791016, "logps_train/policy_2_2": -81.90167999267578, "logps_train/policy_2_w": -136.7144775390625, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -101.5, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 1.4170608520507812, "rewards_train/1-l": -2.4579648971557617, "rewards_train/1-w": 1.5366780757904053, "rewards_train/2-2": 1.964714527130127, "rewards_train/2-w": 0.8305050134658813, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.994642972946167, "rewards_train/margins_1": 0.11961722373962402, "rewards_train/margins_2": 1.1342095136642456, "step": 194 }, { "epoch": 0.58, "logps_train/policy_1_2": -130.6674346923828, "logps_train/policy_1_l": -172.78729248046875, "logps_train/policy_1_w": -147.7978057861328, "logps_train/policy_2_2": -97.76174926757812, "logps_train/policy_2_w": -180.78314208984375, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.3785688877105713, "rewards_train/1-l": -1.8021676540374756, "rewards_train/1-w": 2.251469135284424, "rewards_train/2-2": 1.837106466293335, "rewards_train/2-w": 1.3404364585876465, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.053636789321899, "rewards_train/margins_1": 0.8729002475738525, "rewards_train/margins_2": 0.4966700077056885, "step": 194 }, { "epoch": 0.58, "logps_train/policy_1_2": -233.19186401367188, "logps_train/policy_1_l": -216.86798095703125, "logps_train/policy_1_w": -180.7281494140625, "logps_train/policy_2_2": -197.69281005859375, "logps_train/policy_2_w": -218.28240966796875, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -205.0, "logps_train/ref_2_2": -221.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.5542513132095337, "rewards_train/1-l": -1.6602345705032349, "rewards_train/1-w": 2.4334356784820557, "rewards_train/2-2": 2.3682188987731934, "rewards_train/2-w": 1.593634843826294, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.0936702489852905, "rewards_train/margins_1": 0.879184365272522, "rewards_train/margins_2": 0.7745840549468994, "step": 194 }, { "epoch": 0.58, "logps_train/policy_1_2": -78.98367309570312, "logps_train/policy_1_l": -82.8538818359375, "logps_train/policy_1_w": -74.62788391113281, "logps_train/policy_2_2": -67.08502197265625, "logps_train/policy_2_w": -84.87406158447266, "logps_train/ref_1_2": -93.0, "logps_train/ref_1_l": -72.5, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -84.5, "logps_train/ref_2_w": -99.0, "rewards_train/1-2": 1.430539608001709, "rewards_train/1-l": -1.0447633266448975, "rewards_train/1-w": 1.8239309787750244, "rewards_train/2-2": 1.742279291152954, "rewards_train/2-w": 1.4274373054504395, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.868694305419922, "rewards_train/margins_1": 0.39339137077331543, "rewards_train/margins_2": 0.31484198570251465, "step": 194 }, { "epoch": 0.58, "logps_train/policy_1_2": -88.03053283691406, "logps_train/policy_1_l": -88.35051727294922, "logps_train/policy_1_w": -80.84061431884766, "logps_train/policy_2_2": -72.40296936035156, "logps_train/policy_2_w": -93.35975646972656, "logps_train/ref_1_2": -95.5, "logps_train/ref_1_l": -75.5, "logps_train/ref_1_w": -95.0, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -104.0, "rewards_train/1-2": 0.7594462633132935, "rewards_train/1-l": -1.2926691770553589, "rewards_train/1-w": 1.4423061609268188, "rewards_train/2-2": 1.2417347431182861, "rewards_train/2-w": 1.0577739477157593, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7349753379821777, "rewards_train/margins_1": 0.6828598976135254, "rewards_train/margins_2": 0.18396079540252686, "step": 194 }, { "epoch": 0.58, "logps_train/policy_1_2": -90.67401885986328, "logps_train/policy_1_l": -53.72295379638672, "logps_train/policy_1_w": -112.8484115600586, "logps_train/policy_2_2": -67.97476196289062, "logps_train/policy_2_w": -149.82626342773438, "logps_train/ref_1_2": -98.5, "logps_train/ref_1_l": -46.5, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -81.5, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 0.7763481140136719, "rewards_train/1-l": -0.7235157489776611, "rewards_train/1-w": 1.7432835102081299, "rewards_train/2-2": 1.34749436378479, "rewards_train/2-w": 1.0595605373382568, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.466799259185791, "rewards_train/margins_1": 0.966935396194458, "rewards_train/margins_2": 0.2879338264465332, "step": 194 }, { "epoch": 0.58, "logps_train/policy_1_2": -160.31968688964844, "logps_train/policy_1_l": -158.5177001953125, "logps_train/policy_1_w": -204.39410400390625, "logps_train/policy_2_2": -127.67631530761719, "logps_train/policy_2_w": -245.0286407470703, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -252.0, "rewards_train/1-2": 0.8031875491142273, "rewards_train/1-l": -0.8371222019195557, "rewards_train/1-w": 1.9605894088745117, "rewards_train/2-2": 1.6495558023452759, "rewards_train/2-w": 0.6471370458602905, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.7977116107940674, "rewards_train/margins_1": 1.1574018597602844, "rewards_train/margins_2": 1.0024187564849854, "step": 195 }, { "epoch": 0.58, "logps_train/policy_1_2": -72.0543441772461, "logps_train/policy_1_l": -57.22053909301758, "logps_train/policy_1_w": -53.7349853515625, "logps_train/policy_2_2": -63.505645751953125, "logps_train/policy_2_w": -65.47242736816406, "logps_train/ref_1_2": -75.0, "logps_train/ref_1_l": -52.5, "logps_train/ref_1_w": -59.5, "logps_train/ref_2_2": -67.0, "logps_train/ref_2_w": -70.0, "rewards_train/1-2": 0.27815932035446167, "rewards_train/1-l": -0.47991514205932617, "rewards_train/1-w": 0.5647337436676025, "rewards_train/2-2": 0.3716033697128296, "rewards_train/2-w": 0.4261942505836487, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 1.0446488857269287, "rewards_train/margins_1": 0.28657442331314087, "rewards_train/margins_2": -0.05459088087081909, "step": 195 }, { "epoch": 0.58, "logps_train/policy_1_2": -68.52544403076172, "logps_train/policy_1_l": -100.19401550292969, "logps_train/policy_1_w": -71.737060546875, "logps_train/policy_2_2": -55.06822204589844, "logps_train/policy_2_w": -96.61376953125, "logps_train/ref_1_2": -80.0, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -70.0, "logps_train/ref_2_w": -95.5, "rewards_train/1-2": 1.1622991561889648, "rewards_train/1-l": -1.1799489259719849, "rewards_train/1-w": 0.8505128622055054, "rewards_train/2-2": 1.500990390777588, "rewards_train/2-w": -0.1168459951877594, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.0304617881774902, "rewards_train/margins_1": -0.3117862939834595, "rewards_train/margins_2": 1.6178363859653473, "step": 195 }, { "epoch": 0.58, "logps_train/policy_1_2": -139.73159790039062, "logps_train/policy_1_l": -112.31746673583984, "logps_train/policy_1_w": -68.20533752441406, "logps_train/policy_2_2": -105.83995819091797, "logps_train/policy_2_w": -92.94403839111328, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -83.5, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -100.5, "rewards_train/1-2": 1.1588717699050903, "rewards_train/1-l": -1.0848722457885742, "rewards_train/1-w": 1.5337626934051514, "rewards_train/2-2": 2.0706920623779297, "rewards_train/2-w": 0.7727839946746826, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.6186349391937256, "rewards_train/margins_1": 0.37489092350006104, "rewards_train/margins_2": 1.297908067703247, "step": 195 }, { "epoch": 0.58, "logps_train/policy_1_2": -140.2726287841797, "logps_train/policy_1_l": -122.45555114746094, "logps_train/policy_1_w": -131.709716796875, "logps_train/policy_2_2": -115.52481842041016, "logps_train/policy_2_w": -156.79322814941406, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 0.7914865016937256, "rewards_train/1-l": -1.2462143898010254, "rewards_train/1-w": 2.35793399810791, "rewards_train/2-2": 1.4631431102752686, "rewards_train/2-w": 1.7363020181655884, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.6041483879089355, "rewards_train/margins_1": 1.5664474964141846, "rewards_train/margins_2": -0.2731589078903198, "step": 195 }, { "epoch": 0.58, "logps_train/policy_1_2": -154.6895751953125, "logps_train/policy_1_l": -144.7081756591797, "logps_train/policy_1_w": -122.89103698730469, "logps_train/policy_2_2": -123.79377746582031, "logps_train/policy_2_w": -161.23141479492188, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.0005736351013184, "rewards_train/1-l": -2.0380048751831055, "rewards_train/1-w": 2.3585524559020996, "rewards_train/2-2": 1.9557793140411377, "rewards_train/2-w": 1.1360375881195068, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.396557331085205, "rewards_train/margins_1": 1.3579788208007812, "rewards_train/margins_2": 0.8197417259216309, "step": 195 }, { "epoch": 0.58, "logps_train/policy_1_2": -231.65196228027344, "logps_train/policy_1_l": -137.9664764404297, "logps_train/policy_1_w": -103.78199768066406, "logps_train/policy_2_2": -193.5348358154297, "logps_train/policy_2_w": -122.88408660888672, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.3379278182983398, "rewards_train/1-l": -0.38121849298477173, "rewards_train/1-w": 1.468285322189331, "rewards_train/2-2": 2.493391513824463, "rewards_train/2-w": 1.129560112953186, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.8495038151741028, "rewards_train/margins_1": 0.1303575038909912, "rewards_train/margins_2": 1.3638314008712769, "step": 195 }, { "epoch": 0.58, "logps_train/policy_1_2": -189.5296630859375, "logps_train/policy_1_l": -229.90252685546875, "logps_train/policy_1_w": -204.5301513671875, "logps_train/policy_2_2": -153.68728637695312, "logps_train/policy_2_w": -274.0107116699219, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -240.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -294.0, "rewards_train/1-2": 2.1517202854156494, "rewards_train/1-l": -2.191913366317749, "rewards_train/1-w": 3.492298126220703, "rewards_train/2-2": 2.8859596252441406, "rewards_train/2-w": 2.017679214477539, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.684211492538452, "rewards_train/margins_1": 1.3405778408050537, "rewards_train/margins_2": 0.8682804107666016, "step": 195 }, { "epoch": 0.59, "learning_rate": 4.241377572657493e-06, "loss": 0.9278, "step": 196 }, { "epoch": 0.59, "logps_train/policy_1_2": -128.07911682128906, "logps_train/policy_1_l": -79.93966674804688, "logps_train/policy_1_w": -78.62165069580078, "logps_train/policy_2_2": -101.09249114990234, "logps_train/policy_2_w": -111.54818725585938, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -75.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 0.5131819248199463, "rewards_train/1-l": -0.49416255950927734, "rewards_train/1-w": 2.31596040725708, "rewards_train/2-2": 1.5934849977493286, "rewards_train/2-w": 1.5483067035675049, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.8101229667663574, "rewards_train/margins_1": 1.8027784824371338, "rewards_train/margins_2": 0.04517829418182373, "step": 196 }, { "epoch": 0.59, "logps_train/policy_1_2": -110.4749984741211, "logps_train/policy_1_l": -89.08642578125, "logps_train/policy_1_w": -110.7607192993164, "logps_train/policy_2_2": -96.95501708984375, "logps_train/policy_2_w": -128.36973571777344, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -81.5, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.6243746280670166, "rewards_train/1-l": -0.7602057456970215, "rewards_train/1-w": 1.262600064277649, "rewards_train/2-2": 1.9392646551132202, "rewards_train/2-w": 0.7536512613296509, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.0228058099746704, "rewards_train/margins_1": -0.3617745637893677, "rewards_train/margins_2": 1.1856133937835693, "step": 196 }, { "epoch": 0.59, "logps_train/policy_1_2": -69.36515808105469, "logps_train/policy_1_l": -76.82221221923828, "logps_train/policy_1_w": -44.5165901184082, "logps_train/policy_2_2": -58.675682067871094, "logps_train/policy_2_w": -60.307411193847656, "logps_train/ref_1_2": -73.0, "logps_train/ref_1_l": -68.0, "logps_train/ref_1_w": -56.0, "logps_train/ref_2_2": -63.75, "logps_train/ref_2_w": -68.5, "rewards_train/1-2": 0.3790118098258972, "rewards_train/1-l": -0.8789984583854675, "rewards_train/1-w": 1.1348645687103271, "rewards_train/2-2": 0.50108402967453, "rewards_train/2-w": 0.8169146776199341, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.0138630270957947, "rewards_train/margins_1": 0.7558527588844299, "rewards_train/margins_2": -0.31583064794540405, "step": 196 }, { "epoch": 0.59, "logps_train/policy_1_2": -156.4696044921875, "logps_train/policy_1_l": -256.07025146484375, "logps_train/policy_1_w": -123.87783813476562, "logps_train/policy_2_2": -129.26779174804688, "logps_train/policy_2_w": -167.37652587890625, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": 1.4577264785766602, "rewards_train/1-l": -2.684565544128418, "rewards_train/1-w": 2.1981537342071533, "rewards_train/2-2": 2.1755642890930176, "rewards_train/2-w": 0.9326601624488831, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.882719278335571, "rewards_train/margins_1": 0.7404272556304932, "rewards_train/margins_2": 1.2429041266441345, "step": 196 }, { "epoch": 0.59, "logps_train/policy_1_2": -182.41246032714844, "logps_train/policy_1_l": -185.83010864257812, "logps_train/policy_1_w": -139.94424438476562, "logps_train/policy_2_2": -156.978271484375, "logps_train/policy_2_w": -176.32232666015625, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 1.165784478187561, "rewards_train/1-l": -2.020608901977539, "rewards_train/1-w": 1.9497166872024536, "rewards_train/2-2": 2.0311760902404785, "rewards_train/2-w": 1.085052490234375, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.9703255891799927, "rewards_train/margins_1": 0.7839322090148926, "rewards_train/margins_2": 0.9461236000061035, "step": 196 }, { "epoch": 0.59, "logps_train/policy_1_2": -241.08465576171875, "logps_train/policy_1_l": -191.1168975830078, "logps_train/policy_1_w": -132.78057861328125, "logps_train/policy_2_2": -201.56362915039062, "logps_train/policy_2_w": -174.29148864746094, "logps_train/ref_1_2": -266.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -238.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 2.525907516479492, "rewards_train/1-l": -1.675386905670166, "rewards_train/1-w": 3.0875678062438965, "rewards_train/2-2": 3.6498868465423584, "rewards_train/2-w": 1.9302269220352173, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.7629547119140625, "rewards_train/margins_1": 0.5616602897644043, "rewards_train/margins_2": 1.7196599245071411, "step": 196 }, { "epoch": 0.59, "logps_train/policy_1_2": -122.89334869384766, "logps_train/policy_1_l": -126.04377746582031, "logps_train/policy_1_w": -95.64788818359375, "logps_train/policy_2_2": -101.32380676269531, "logps_train/policy_2_w": -115.70817565917969, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 1.1505087614059448, "rewards_train/1-l": -1.3194644451141357, "rewards_train/1-w": 1.896296739578247, "rewards_train/2-2": 1.4452558755874634, "rewards_train/2-w": 1.1506664752960205, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.215761184692383, "rewards_train/margins_1": 0.7457879781723022, "rewards_train/margins_2": 0.29458940029144287, "step": 196 }, { "epoch": 0.59, "logps_train/policy_1_2": -114.39733123779297, "logps_train/policy_1_l": -114.94873046875, "logps_train/policy_1_w": -131.61354064941406, "logps_train/policy_2_2": -95.98674011230469, "logps_train/policy_2_w": -153.34066772460938, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.3079231977462769, "rewards_train/1-l": -0.643311083316803, "rewards_train/1-w": 2.2807352542877197, "rewards_train/2-2": 1.5298421382904053, "rewards_train/2-w": 1.5747225284576416, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.9240463376045227, "rewards_train/margins_1": 0.9728120565414429, "rewards_train/margins_2": -0.04488039016723633, "step": 196 }, { "epoch": 0.59, "logps_train/policy_1_2": -188.72964477539062, "logps_train/policy_1_l": -180.42063903808594, "logps_train/policy_1_w": -205.86044311523438, "logps_train/policy_2_2": -154.35525512695312, "logps_train/policy_2_w": -247.97686767578125, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -237.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 1.4317235946655273, "rewards_train/1-l": -1.436594843864441, "rewards_train/1-w": 3.113955020904541, "rewards_train/2-2": 2.1754119396209717, "rewards_train/2-w": 2.175751209259033, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.550549864768982, "rewards_train/margins_1": 1.6822314262390137, "rewards_train/margins_2": -0.00033926963806152344, "step": 197 }, { "epoch": 0.59, "logps_train/policy_1_2": -138.98953247070312, "logps_train/policy_1_l": -187.97354125976562, "logps_train/policy_1_w": -121.16749572753906, "logps_train/policy_2_2": -119.91016387939453, "logps_train/policy_2_w": -149.61624145507812, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.9541726112365723, "rewards_train/1-l": -1.5952051877975464, "rewards_train/1-w": 1.2207505702972412, "rewards_train/2-2": 2.3808579444885254, "rewards_train/2-w": 0.46493852138519287, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.8159557580947876, "rewards_train/margins_1": -0.733422040939331, "rewards_train/margins_2": 1.9159194231033325, "step": 197 }, { "epoch": 0.59, "logps_train/policy_1_2": -179.388427734375, "logps_train/policy_1_l": -172.5175323486328, "logps_train/policy_1_w": -160.98794555664062, "logps_train/policy_2_2": -145.3031005859375, "logps_train/policy_2_w": -200.74789428710938, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 1.4830312728881836, "rewards_train/1-l": -0.9164503216743469, "rewards_train/1-w": 3.1363630294799805, "rewards_train/2-2": 2.576721429824829, "rewards_train/2-w": 2.3314599990844727, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.052813351154327, "rewards_train/margins_1": 1.6533317565917969, "rewards_train/margins_2": 0.24526143074035645, "step": 197 }, { "epoch": 0.59, "logps_train/policy_1_2": -212.73486328125, "logps_train/policy_1_l": -192.03732299804688, "logps_train/policy_1_w": -152.30609130859375, "logps_train/policy_2_2": -163.87315368652344, "logps_train/policy_2_w": -194.8885040283203, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": 1.7562026977539062, "rewards_train/1-l": -2.4021713733673096, "rewards_train/1-w": 2.3787662982940674, "rewards_train/2-2": 3.003309726715088, "rewards_train/2-w": 1.4017748832702637, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.780937671661377, "rewards_train/margins_1": 0.6225636005401611, "rewards_train/margins_2": 1.6015348434448242, "step": 197 }, { "epoch": 0.59, "logps_train/policy_1_2": -109.76929473876953, "logps_train/policy_1_l": -99.84766387939453, "logps_train/policy_1_w": -76.6890869140625, "logps_train/policy_2_2": -92.08712768554688, "logps_train/policy_2_w": -98.3399887084961, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 1.2956594228744507, "rewards_train/1-l": -1.5539741516113281, "rewards_train/1-w": 1.5982788801193237, "rewards_train/2-2": 1.4406099319458008, "rewards_train/2-w": 1.2613139152526855, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.152253031730652, "rewards_train/margins_1": 0.30261945724487305, "rewards_train/margins_2": 0.17929601669311523, "step": 197 }, { "epoch": 0.59, "logps_train/policy_1_2": -186.9527587890625, "logps_train/policy_1_l": -172.39498901367188, "logps_train/policy_1_w": -122.59662628173828, "logps_train/policy_2_2": -147.4935302734375, "logps_train/policy_2_w": -150.1635284423828, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 2.1859731674194336, "rewards_train/1-l": -2.1004369258880615, "rewards_train/1-w": 1.9622125625610352, "rewards_train/2-2": 3.3943958282470703, "rewards_train/2-w": 1.543022871017456, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.062649488449097, "rewards_train/margins_1": -0.22376060485839844, "rewards_train/margins_2": 1.8513729572296143, "step": 197 }, { "epoch": 0.59, "logps_train/policy_1_2": -231.07188415527344, "logps_train/policy_1_l": -171.87356567382812, "logps_train/policy_1_w": -133.73052978515625, "logps_train/policy_2_2": -195.8994598388672, "logps_train/policy_2_w": -159.86099243164062, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": 0.614686906337738, "rewards_train/1-l": -2.259232521057129, "rewards_train/1-w": 1.723432183265686, "rewards_train/2-2": 1.6202096939086914, "rewards_train/2-w": 1.1056972742080688, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.982664704322815, "rewards_train/margins_1": 1.108745276927948, "rewards_train/margins_2": 0.5145124197006226, "step": 197 }, { "epoch": 0.59, "logps_train/policy_1_2": -127.5047378540039, "logps_train/policy_1_l": -193.6539306640625, "logps_train/policy_1_w": -109.88529968261719, "logps_train/policy_2_2": -104.51216125488281, "logps_train/policy_2_w": -142.82772827148438, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.2807756662368774, "rewards_train/1-l": -2.686047077178955, "rewards_train/1-w": 2.5645947456359863, "rewards_train/2-2": 1.9065954685211182, "rewards_train/2-w": 1.8359771966934204, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.250641822814941, "rewards_train/margins_1": 1.2838190793991089, "rewards_train/margins_2": 0.07061827182769775, "step": 197 }, { "epoch": 0.59, "learning_rate": 4.22357184920253e-06, "loss": 0.7788, "step": 198 }, { "epoch": 0.59, "logps_train/policy_1_2": -147.40576171875, "logps_train/policy_1_l": -186.59478759765625, "logps_train/policy_1_w": -130.30764770507812, "logps_train/policy_2_2": -124.21472930908203, "logps_train/policy_2_w": -166.171142578125, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 2.2117693424224854, "rewards_train/1-l": -2.406402587890625, "rewards_train/1-w": 2.934469223022461, "rewards_train/2-2": 2.5757925510406494, "rewards_train/2-w": 1.8594486713409424, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.340871810913086, "rewards_train/margins_1": 0.7226998805999756, "rewards_train/margins_2": 0.716343879699707, "step": 198 }, { "epoch": 0.59, "logps_train/policy_1_2": -127.10148620605469, "logps_train/policy_1_l": -213.68545532226562, "logps_train/policy_1_w": -90.88688659667969, "logps_train/policy_2_2": -101.47262573242188, "logps_train/policy_2_w": -119.71170043945312, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -125.5, "rewards_train/1-2": 0.7597731351852417, "rewards_train/1-l": -2.0763583183288574, "rewards_train/1-w": 0.8659989833831787, "rewards_train/2-2": 1.177737832069397, "rewards_train/2-w": 0.5725806951522827, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.942357301712036, "rewards_train/margins_1": 0.10622584819793701, "rewards_train/margins_2": 0.6051571369171143, "step": 198 }, { "epoch": 0.59, "logps_train/policy_1_2": -186.70533752441406, "logps_train/policy_1_l": -202.46633911132812, "logps_train/policy_1_w": -111.07685089111328, "logps_train/policy_2_2": -148.53805541992188, "logps_train/policy_2_w": -147.95925903320312, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 1.8294659852981567, "rewards_train/1-l": -3.195072650909424, "rewards_train/1-w": 1.6110649108886719, "rewards_train/2-2": 2.5836942195892334, "rewards_train/2-w": 1.105635643005371, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.806137561798096, "rewards_train/margins_1": -0.21840107440948486, "rewards_train/margins_2": 1.4780585765838623, "step": 198 }, { "epoch": 0.59, "logps_train/policy_1_2": -182.066162109375, "logps_train/policy_1_l": -217.38546752929688, "logps_train/policy_1_w": -133.4405975341797, "logps_train/policy_2_2": -141.94332885742188, "logps_train/policy_2_w": -172.55108642578125, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 1.2121329307556152, "rewards_train/1-l": -1.7049529552459717, "rewards_train/1-w": 1.855940818786621, "rewards_train/2-2": 1.9744168519973755, "rewards_train/2-w": 1.0511419773101807, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.5608937740325928, "rewards_train/margins_1": 0.6438078880310059, "rewards_train/margins_2": 0.9232748746871948, "step": 198 }, { "epoch": 0.59, "logps_train/policy_1_2": -165.60348510742188, "logps_train/policy_1_l": -197.25314331054688, "logps_train/policy_1_w": -101.74195861816406, "logps_train/policy_2_2": -134.206787109375, "logps_train/policy_2_w": -126.51333618164062, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.2630882263183594, "rewards_train/1-l": -2.099141836166382, "rewards_train/1-w": 2.2125232219696045, "rewards_train/2-2": 1.8699465990066528, "rewards_train/2-w": 1.4724947214126587, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.311665058135986, "rewards_train/margins_1": 0.9494349956512451, "rewards_train/margins_2": 0.39745187759399414, "step": 198 }, { "epoch": 0.59, "logps_train/policy_1_2": -137.9447784423828, "logps_train/policy_1_l": -125.8996353149414, "logps_train/policy_1_w": -115.21000671386719, "logps_train/policy_2_2": -115.3191909790039, "logps_train/policy_2_w": -146.096923828125, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 1.4289593696594238, "rewards_train/1-l": -1.5247294902801514, "rewards_train/1-w": 2.0431604385375977, "rewards_train/2-2": 1.9409327507019043, "rewards_train/2-w": 1.2727298736572266, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.567889928817749, "rewards_train/margins_1": 0.6142010688781738, "rewards_train/margins_2": 0.6682028770446777, "step": 198 }, { "epoch": 0.59, "logps_train/policy_1_2": -178.10394287109375, "logps_train/policy_1_l": -244.67330932617188, "logps_train/policy_1_w": -110.37975311279297, "logps_train/policy_2_2": -144.2128143310547, "logps_train/policy_2_w": -142.58316040039062, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -231.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 0.965728223323822, "rewards_train/1-l": -1.3886699676513672, "rewards_train/1-w": 1.8577280044555664, "rewards_train/2-2": 1.7904138565063477, "rewards_train/2-w": 1.4323097467422485, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.2463979721069336, "rewards_train/margins_1": 0.8919997811317444, "rewards_train/margins_2": 0.3581041097640991, "step": 198 }, { "epoch": 0.59, "logps_train/policy_1_2": -175.4611053466797, "logps_train/policy_1_l": -148.31488037109375, "logps_train/policy_1_w": -130.22781372070312, "logps_train/policy_2_2": -142.6067657470703, "logps_train/policy_2_w": -164.14527893066406, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 0.9146801829338074, "rewards_train/1-l": -1.8472599983215332, "rewards_train/1-w": 2.747530937194824, "rewards_train/2-2": 1.785442590713501, "rewards_train/2-w": 1.9475817680358887, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.594790935516357, "rewards_train/margins_1": 1.8328507542610168, "rewards_train/margins_2": -0.1621391773223877, "step": 198 }, { "epoch": 0.6, "logps_train/policy_1_2": -101.85449981689453, "logps_train/policy_1_l": -95.72270965576172, "logps_train/policy_1_w": -97.6214599609375, "logps_train/policy_2_2": -86.51709747314453, "logps_train/policy_2_w": -113.22503662109375, "logps_train/ref_1_2": -109.5, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -97.5, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 0.7680651545524597, "rewards_train/1-l": -0.6890430450439453, "rewards_train/1-w": 1.3179324865341187, "rewards_train/2-2": 1.1139156818389893, "rewards_train/2-w": 0.8985893726348877, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.006975531578064, "rewards_train/margins_1": 0.5498673319816589, "rewards_train/margins_2": 0.21532630920410156, "step": 199 }, { "epoch": 0.6, "logps_train/policy_1_2": -115.98212432861328, "logps_train/policy_1_l": -168.23626708984375, "logps_train/policy_1_w": -128.27439880371094, "logps_train/policy_2_2": -98.75790405273438, "logps_train/policy_2_w": -156.43234252929688, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 0.8361631035804749, "rewards_train/1-l": -1.6648385524749756, "rewards_train/1-w": 1.9885754585266113, "rewards_train/2-2": 1.0386629104614258, "rewards_train/2-w": 1.241628885269165, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.653414011001587, "rewards_train/margins_1": 1.1524123549461365, "rewards_train/margins_2": -0.20296597480773926, "step": 199 }, { "epoch": 0.6, "logps_train/policy_1_2": -164.27911376953125, "logps_train/policy_1_l": -200.92205810546875, "logps_train/policy_1_w": -110.03495788574219, "logps_train/policy_2_2": -137.4487762451172, "logps_train/policy_2_w": -142.09857177734375, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.7377135753631592, "rewards_train/1-l": -1.8195374011993408, "rewards_train/1-w": 1.976191520690918, "rewards_train/2-2": 2.573286294937134, "rewards_train/2-w": 1.6432682275772095, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.795728921890259, "rewards_train/margins_1": 0.2384779453277588, "rewards_train/margins_2": 0.9300180673599243, "step": 199 }, { "epoch": 0.6, "logps_train/policy_1_2": -126.42041778564453, "logps_train/policy_1_l": -141.87030029296875, "logps_train/policy_1_w": -133.19590759277344, "logps_train/policy_2_2": -107.93753814697266, "logps_train/policy_2_w": -160.69976806640625, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.5708487033843994, "rewards_train/1-l": -1.2025095224380493, "rewards_train/1-w": 2.4491593837738037, "rewards_train/2-2": 1.9914023876190186, "rewards_train/2-w": 2.1003353595733643, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.651668906211853, "rewards_train/margins_1": 0.8783106803894043, "rewards_train/margins_2": -0.1089329719543457, "step": 199 }, { "epoch": 0.6, "logps_train/policy_1_2": -161.15821838378906, "logps_train/policy_1_l": -108.1473388671875, "logps_train/policy_1_w": -121.40137481689453, "logps_train/policy_2_2": -133.29953002929688, "logps_train/policy_2_w": -152.17755126953125, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.8576158285140991, "rewards_train/1-l": -0.9084839224815369, "rewards_train/1-w": 2.183300018310547, "rewards_train/2-2": 2.69504714012146, "rewards_train/2-w": 1.3189620971679688, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0917839407920837, "rewards_train/margins_1": 0.32568418979644775, "rewards_train/margins_2": 1.3760850429534912, "step": 199 }, { "epoch": 0.6, "logps_train/policy_1_2": -218.0995635986328, "logps_train/policy_1_l": -275.1395263671875, "logps_train/policy_1_w": -214.32168579101562, "logps_train/policy_2_2": -192.29873657226562, "logps_train/policy_2_w": -257.8531494140625, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -248.0, "logps_train/ref_1_w": -242.0, "logps_train/ref_2_2": -221.0, "logps_train/ref_2_w": -276.0, "rewards_train/1-2": 2.540043354034424, "rewards_train/1-l": -2.615516424179077, "rewards_train/1-w": 2.8498635292053223, "rewards_train/2-2": 2.863877296447754, "rewards_train/2-w": 1.6537483930587769, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.465379953384399, "rewards_train/margins_1": 0.30982017517089844, "rewards_train/margins_2": 1.210128903388977, "step": 199 }, { "epoch": 0.6, "logps_train/policy_1_2": -164.65118408203125, "logps_train/policy_1_l": -137.1498565673828, "logps_train/policy_1_w": -172.56201171875, "logps_train/policy_2_2": -131.32373046875, "logps_train/policy_2_w": -221.8072509765625, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.0130062103271484, "rewards_train/1-l": -0.10873532295227051, "rewards_train/1-w": 2.499659299850464, "rewards_train/2-2": 1.3857910633087158, "rewards_train/2-w": 1.650524377822876, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.6083946228027344, "rewards_train/margins_1": 1.4866530895233154, "rewards_train/margins_2": -0.26473331451416016, "step": 199 }, { "epoch": 0.6, "logps_train/policy_1_2": -119.40640258789062, "logps_train/policy_1_l": -234.25555419921875, "logps_train/policy_1_w": -171.31982421875, "logps_train/policy_2_2": -97.54715728759766, "logps_train/policy_2_w": -203.17300415039062, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -213.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 1.5234224796295166, "rewards_train/1-l": -2.0882506370544434, "rewards_train/1-w": 3.038719654083252, "rewards_train/2-2": 1.693721890449524, "rewards_train/2-w": 2.12410569190979, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.126970291137695, "rewards_train/margins_1": 1.5152971744537354, "rewards_train/margins_2": -0.4303838014602661, "step": 199 }, { "epoch": 0.6, "learning_rate": 4.205597908173555e-06, "loss": 0.8019, "step": 200 }, { "epoch": 0.6, "logps_train/policy_1_2": -200.93870544433594, "logps_train/policy_1_l": -256.3165588378906, "logps_train/policy_1_w": -193.110595703125, "logps_train/policy_2_2": -167.30845642089844, "logps_train/policy_2_w": -234.01502990722656, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -252.0, "rewards_train/1-2": 1.6256604194641113, "rewards_train/1-l": -2.6050920486450195, "rewards_train/1-w": 2.4842536449432373, "rewards_train/2-2": 2.5211081504821777, "rewards_train/2-w": 1.8016220331192017, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.089345693588257, "rewards_train/margins_1": 0.858593225479126, "rewards_train/margins_2": 0.7194861173629761, "step": 200 }, { "epoch": 0.6, "logps_train/policy_1_2": -231.1605224609375, "logps_train/policy_1_l": -231.30238342285156, "logps_train/policy_1_w": -218.96844482421875, "logps_train/policy_2_2": -194.58279418945312, "logps_train/policy_2_w": -251.04685974121094, "logps_train/ref_1_2": -254.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -252.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -274.0, "rewards_train/1-2": 2.297229290008545, "rewards_train/1-l": -1.9323872327804565, "rewards_train/1-w": 3.254328727722168, "rewards_train/2-2": 3.3503143787384033, "rewards_train/2-w": 2.255859851837158, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.1867159605026245, "rewards_train/margins_1": 0.957099437713623, "rewards_train/margins_2": 1.0944545269012451, "step": 200 }, { "epoch": 0.6, "logps_train/policy_1_2": -160.53805541992188, "logps_train/policy_1_l": -211.9801788330078, "logps_train/policy_1_w": -145.3148956298828, "logps_train/policy_2_2": -132.8001251220703, "logps_train/policy_2_w": -181.85133361816406, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.8055686950683594, "rewards_train/1-l": -1.741767168045044, "rewards_train/1-w": 2.0060107707977295, "rewards_train/2-2": 2.5699877738952637, "rewards_train/2-w": 1.1461169719696045, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.7477779388427734, "rewards_train/margins_1": 0.20044207572937012, "rewards_train/margins_2": 1.4238708019256592, "step": 200 }, { "epoch": 0.6, "logps_train/policy_1_2": -107.77500915527344, "logps_train/policy_1_l": -137.37295532226562, "logps_train/policy_1_w": -75.66883850097656, "logps_train/policy_2_2": -90.425048828125, "logps_train/policy_2_w": -95.20341491699219, "logps_train/ref_1_2": -122.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -88.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -105.0, "rewards_train/1-2": 1.4232800006866455, "rewards_train/1-l": -1.6318265199661255, "rewards_train/1-w": 1.2116316556930542, "rewards_train/2-2": 1.635619878768921, "rewards_train/2-w": 0.9570023417472839, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8434581756591797, "rewards_train/margins_1": -0.2116483449935913, "rewards_train/margins_2": 0.678617537021637, "step": 200 }, { "epoch": 0.6, "logps_train/policy_1_2": -250.76580810546875, "logps_train/policy_1_l": -310.4447937011719, "logps_train/policy_1_w": -183.4046173095703, "logps_train/policy_2_2": -209.55130004882812, "logps_train/policy_2_w": -222.21481323242188, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -278.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -238.0, "logps_train/ref_2_w": -241.0, "rewards_train/1-2": 1.698419213294983, "rewards_train/1-l": -3.228855609893799, "rewards_train/1-w": 2.4845380783081055, "rewards_train/2-2": 2.9151835441589355, "rewards_train/2-w": 1.8347700834274292, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.713393688201904, "rewards_train/margins_1": 0.7861188650131226, "rewards_train/margins_2": 1.0804134607315063, "step": 200 }, { "epoch": 0.6, "logps_train/policy_1_2": -118.02804565429688, "logps_train/policy_1_l": -103.2177734375, "logps_train/policy_1_w": -126.22564697265625, "logps_train/policy_2_2": -88.69499206542969, "logps_train/policy_2_w": -147.1729736328125, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.1628210544586182, "rewards_train/1-l": -0.9250982403755188, "rewards_train/1-w": 1.7454047203063965, "rewards_train/2-2": 1.9777660369873047, "rewards_train/2-w": 1.2545764446258545, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.6705029606819153, "rewards_train/margins_1": 0.5825836658477783, "rewards_train/margins_2": 0.7231895923614502, "step": 200 }, { "epoch": 0.6, "logps_train/policy_1_2": -141.64773559570312, "logps_train/policy_1_l": -134.6759033203125, "logps_train/policy_1_w": -174.68585205078125, "logps_train/policy_2_2": -125.97828674316406, "logps_train/policy_2_w": -200.04147338867188, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.4665732383728027, "rewards_train/1-l": -1.251771092414856, "rewards_train/1-w": 1.943523645401001, "rewards_train/2-2": 1.8184804916381836, "rewards_train/2-w": 0.9556189775466919, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.195294737815857, "rewards_train/margins_1": 0.47695040702819824, "rewards_train/margins_2": 0.8628615140914917, "step": 200 }, { "epoch": 0.6, "logps_train/policy_1_2": -175.75111389160156, "logps_train/policy_1_l": -148.1392364501953, "logps_train/policy_1_w": -175.9793701171875, "logps_train/policy_2_2": -153.14598083496094, "logps_train/policy_2_w": -205.66253662109375, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -203.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.2389509677886963, "rewards_train/1-l": -0.8311105966567993, "rewards_train/1-w": 2.722374677658081, "rewards_train/2-2": 2.0158708095550537, "rewards_train/2-w": 1.6446833610534668, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.5534852743148804, "rewards_train/margins_1": 1.4834237098693848, "rewards_train/margins_2": 0.3711874485015869, "step": 200 }, { "epoch": 0.6, "logps_train/policy_1_2": -113.4169921875, "logps_train/policy_1_l": -88.67616271972656, "logps_train/policy_1_w": -61.04591751098633, "logps_train/policy_2_2": -93.28731536865234, "logps_train/policy_2_w": -79.099609375, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -69.0, "logps_train/ref_2_2": -107.5, "logps_train/ref_2_w": -84.0, "rewards_train/1-2": 0.9918942451477051, "rewards_train/1-l": -0.5564833879470825, "rewards_train/1-w": 0.8046857118606567, "rewards_train/2-2": 1.448611855506897, "rewards_train/2-w": 0.5212892293930054, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.3611690998077393, "rewards_train/margins_1": -0.18720853328704834, "rewards_train/margins_2": 0.9273226261138916, "step": 201 }, { "epoch": 0.6, "logps_train/policy_1_2": -114.87970733642578, "logps_train/policy_1_l": -75.35023498535156, "logps_train/policy_1_w": -138.07461547851562, "logps_train/policy_2_2": -102.35804748535156, "logps_train/policy_2_w": -152.17678833007812, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -67.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 0.9233568906784058, "rewards_train/1-l": -0.8716443777084351, "rewards_train/1-w": 2.3074793815612793, "rewards_train/2-2": 1.3526723384857178, "rewards_train/2-w": 1.9545879364013672, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.1791237592697144, "rewards_train/margins_1": 1.3841224908828735, "rewards_train/margins_2": -0.6019155979156494, "step": 201 }, { "epoch": 0.6, "logps_train/policy_1_2": -132.37176513671875, "logps_train/policy_1_l": -131.22959899902344, "logps_train/policy_1_w": -88.3436279296875, "logps_train/policy_2_2": -99.21466827392578, "logps_train/policy_2_w": -126.7664566040039, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -117.5, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 0.9893864393234253, "rewards_train/1-l": -1.2878037691116333, "rewards_train/1-w": 1.6711058616638184, "rewards_train/2-2": 1.8629080057144165, "rewards_train/2-w": 1.1065572500228882, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.9589096307754517, "rewards_train/margins_1": 0.6817194223403931, "rewards_train/margins_2": 0.7563507556915283, "step": 201 }, { "epoch": 0.6, "logps_train/policy_1_2": -127.65348815917969, "logps_train/policy_1_l": -74.03390502929688, "logps_train/policy_1_w": -67.44802856445312, "logps_train/policy_2_2": -103.86457061767578, "logps_train/policy_2_w": -80.9383316040039, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -65.0, "logps_train/ref_1_w": -79.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -90.5, "rewards_train/1-2": 1.1455893516540527, "rewards_train/1-l": -0.9319612979888916, "rewards_train/1-w": 1.1692049503326416, "rewards_train/2-2": 2.0307304859161377, "rewards_train/2-w": 0.9688096642494202, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.101166248321533, "rewards_train/margins_1": 0.023615598678588867, "rewards_train/margins_2": 1.0619208216667175, "step": 201 }, { "epoch": 0.6, "logps_train/policy_1_2": -107.20411682128906, "logps_train/policy_1_l": -97.36351776123047, "logps_train/policy_1_w": -109.4205551147461, "logps_train/policy_2_2": -84.3436508178711, "logps_train/policy_2_w": -144.97134399414062, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -88.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -101.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.2747056484222412, "rewards_train/1-l": -0.9624261260032654, "rewards_train/1-w": 1.9024763107299805, "rewards_train/2-2": 1.670712947845459, "rewards_train/2-w": 0.985679030418396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.864902436733246, "rewards_train/margins_1": 0.6277706623077393, "rewards_train/margins_2": 0.685033917427063, "step": 201 }, { "epoch": 0.6, "logps_train/policy_1_2": -209.9112091064453, "logps_train/policy_1_l": -224.2176971435547, "logps_train/policy_1_w": -201.43194580078125, "logps_train/policy_2_2": -166.3046417236328, "logps_train/policy_2_w": -257.544677734375, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -245.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -288.0, "rewards_train/1-2": 1.4756755828857422, "rewards_train/1-l": -1.7553627490997314, "rewards_train/1-w": 4.3161797523498535, "rewards_train/2-2": 2.3353569507598877, "rewards_train/2-w": 2.9392826557159424, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 6.071542501449585, "rewards_train/margins_1": 2.8405041694641113, "rewards_train/margins_2": -0.6039257049560547, "step": 201 }, { "epoch": 0.6, "logps_train/policy_1_2": -93.7997055053711, "logps_train/policy_1_l": -113.00686645507812, "logps_train/policy_1_w": -81.30389404296875, "logps_train/policy_2_2": -73.07342529296875, "logps_train/policy_2_w": -111.57470703125, "logps_train/ref_1_2": -102.5, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -87.5, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 0.8540134429931641, "rewards_train/1-l": -0.7319360375404358, "rewards_train/1-w": 1.5942202806472778, "rewards_train/2-2": 1.4356262683868408, "rewards_train/2-w": 0.7120609283447266, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.3261563181877136, "rewards_train/margins_1": 0.7402068376541138, "rewards_train/margins_2": 0.7235653400421143, "step": 201 }, { "epoch": 0.6, "logps_train/policy_1_2": -141.77198791503906, "logps_train/policy_1_l": -157.06890869140625, "logps_train/policy_1_w": -131.94113159179688, "logps_train/policy_2_2": -116.12652587890625, "logps_train/policy_2_w": -165.5603485107422, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.1095194816589355, "rewards_train/1-l": -1.4663450717926025, "rewards_train/1-w": 2.1340136528015137, "rewards_train/2-2": 1.7656680345535278, "rewards_train/2-w": 1.2228707075119019, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.600358724594116, "rewards_train/margins_1": 1.0244941711425781, "rewards_train/margins_2": 0.542797327041626, "step": 201 }, { "epoch": 0.6, "learning_rate": 4.187457503795526e-06, "loss": 0.8124, "step": 202 }, { "epoch": 0.6, "logps_train/policy_1_2": -186.42730712890625, "logps_train/policy_1_l": -145.42681884765625, "logps_train/policy_1_w": -147.8937225341797, "logps_train/policy_2_2": -152.99490356445312, "logps_train/policy_2_w": -172.55508422851562, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.7322686910629272, "rewards_train/1-l": -1.0850656032562256, "rewards_train/1-w": 2.605257034301758, "rewards_train/2-2": 2.800508975982666, "rewards_train/2-w": 1.926716923713684, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.6903226375579834, "rewards_train/margins_1": 0.8729883432388306, "rewards_train/margins_2": 0.8737920522689819, "step": 202 }, { "epoch": 0.6, "logps_train/policy_1_2": -100.45413208007812, "logps_train/policy_1_l": -151.43348693847656, "logps_train/policy_1_w": -97.47972106933594, "logps_train/policy_2_2": -75.79539489746094, "logps_train/policy_2_w": -129.7258758544922, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -92.5, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.275681495666504, "rewards_train/1-l": -1.9658092260360718, "rewards_train/1-w": 2.1324968338012695, "rewards_train/2-2": 1.6886241436004639, "rewards_train/2-w": 1.2574907541275024, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.098306059837341, "rewards_train/margins_1": 0.8568153381347656, "rewards_train/margins_2": 0.4311333894729614, "step": 202 }, { "epoch": 0.6, "logps_train/policy_1_2": -106.59774780273438, "logps_train/policy_1_l": -91.164306640625, "logps_train/policy_1_w": -46.00004577636719, "logps_train/policy_2_2": -83.69425201416016, "logps_train/policy_2_w": -65.65568542480469, "logps_train/ref_1_2": -115.5, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -61.0, "logps_train/ref_2_2": -100.5, "logps_train/ref_2_w": -79.0, "rewards_train/1-2": 0.8683510422706604, "rewards_train/1-l": -0.7342037558555603, "rewards_train/1-w": 1.4910107851028442, "rewards_train/2-2": 1.668856143951416, "rewards_train/2-w": 1.3350169658660889, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.2252145409584045, "rewards_train/margins_1": 0.6226597428321838, "rewards_train/margins_2": 0.33383917808532715, "step": 202 }, { "epoch": 0.6, "logps_train/policy_1_2": -229.56631469726562, "logps_train/policy_1_l": -156.0094451904297, "logps_train/policy_1_w": -187.0040283203125, "logps_train/policy_2_2": -192.50277709960938, "logps_train/policy_2_w": -231.7692108154297, "logps_train/ref_1_2": -247.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -222.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.7058677673339844, "rewards_train/1-l": -1.310319185256958, "rewards_train/1-w": 3.4245972633361816, "rewards_train/2-2": 2.602846384048462, "rewards_train/2-w": 2.2855796813964844, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.73491644859314, "rewards_train/margins_1": 1.7187294960021973, "rewards_train/margins_2": 0.31726670265197754, "step": 202 }, { "epoch": 0.6, "logps_train/policy_1_2": -205.31228637695312, "logps_train/policy_1_l": -297.3898620605469, "logps_train/policy_1_w": -220.41635131835938, "logps_train/policy_2_2": -169.6593017578125, "logps_train/policy_2_w": -268.5189208984375, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -276.0, "logps_train/ref_1_w": -244.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -282.0, "rewards_train/1-2": 1.8461143970489502, "rewards_train/1-l": -2.1861538887023926, "rewards_train/1-w": 2.4607086181640625, "rewards_train/2-2": 2.516491174697876, "rewards_train/2-w": 1.3430302143096924, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.646862506866455, "rewards_train/margins_1": 0.6145942211151123, "rewards_train/margins_2": 1.1734609603881836, "step": 202 }, { "epoch": 0.6, "logps_train/policy_1_2": -176.4619140625, "logps_train/policy_1_l": -186.671142578125, "logps_train/policy_1_w": -118.85282897949219, "logps_train/policy_2_2": -140.89813232421875, "logps_train/policy_2_w": -166.96343994140625, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": 1.3944337368011475, "rewards_train/1-l": -1.7925057411193848, "rewards_train/1-w": 1.964716911315918, "rewards_train/2-2": 2.367218494415283, "rewards_train/2-w": 1.3942815065383911, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.7572226524353027, "rewards_train/margins_1": 0.5702831745147705, "rewards_train/margins_2": 0.9729369878768921, "step": 202 }, { "epoch": 0.6, "logps_train/policy_1_2": -105.64608764648438, "logps_train/policy_1_l": -56.479244232177734, "logps_train/policy_1_w": -82.83857727050781, "logps_train/policy_2_2": -81.30429077148438, "logps_train/policy_2_w": -110.14482879638672, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -50.0, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -99.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 0.923672080039978, "rewards_train/1-l": -0.6319089531898499, "rewards_train/1-w": 2.560283660888672, "rewards_train/2-2": 1.7617580890655518, "rewards_train/2-w": 2.073798656463623, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.1921926140785217, "rewards_train/margins_1": 1.6366115808486938, "rewards_train/margins_2": -0.3120405673980713, "step": 202 }, { "epoch": 0.6, "logps_train/policy_1_2": -152.4866943359375, "logps_train/policy_1_l": -107.2571029663086, "logps_train/policy_1_w": -121.25013732910156, "logps_train/policy_2_2": -132.250732421875, "logps_train/policy_2_w": -142.73318481445312, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.7989866733551025, "rewards_train/1-l": -1.136452555656433, "rewards_train/1-w": 1.9044300317764282, "rewards_train/2-2": 2.309692859649658, "rewards_train/2-w": 1.226095199584961, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0408825874328613, "rewards_train/margins_1": 0.10544335842132568, "rewards_train/margins_2": 1.0835976600646973, "step": 202 }, { "epoch": 0.61, "logps_train/policy_1_2": -186.92481994628906, "logps_train/policy_1_l": -120.1306381225586, "logps_train/policy_1_w": -102.19969177246094, "logps_train/policy_2_2": -145.3078155517578, "logps_train/policy_2_w": -139.01815795898438, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.096191167831421, "rewards_train/1-l": -1.5097432136535645, "rewards_train/1-w": 2.5179214477539062, "rewards_train/2-2": 2.499345302581787, "rewards_train/2-w": 1.6470119953155518, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.027664661407471, "rewards_train/margins_1": 1.4217302799224854, "rewards_train/margins_2": 0.8523333072662354, "step": 203 }, { "epoch": 0.61, "logps_train/policy_1_2": -140.9693145751953, "logps_train/policy_1_l": -96.57252502441406, "logps_train/policy_1_w": -152.6463623046875, "logps_train/policy_2_2": -113.2164306640625, "logps_train/policy_2_w": -184.70941162109375, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 1.015568494796753, "rewards_train/1-l": -0.7588149309158325, "rewards_train/1-w": 2.7144649028778076, "rewards_train/2-2": 1.567420244216919, "rewards_train/2-w": 1.650153398513794, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.47327983379364, "rewards_train/margins_1": 1.6988964080810547, "rewards_train/margins_2": -0.082733154296875, "step": 203 }, { "epoch": 0.61, "logps_train/policy_1_2": -120.6467056274414, "logps_train/policy_1_l": -113.82736206054688, "logps_train/policy_1_w": -84.5811767578125, "logps_train/policy_2_2": -97.47151184082031, "logps_train/policy_2_w": -107.4820556640625, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -99.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 1.1228300333023071, "rewards_train/1-l": -1.1999237537384033, "rewards_train/1-w": 1.4723514318466187, "rewards_train/2-2": 1.8403489589691162, "rewards_train/2-w": 1.0135135650634766, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.672275185585022, "rewards_train/margins_1": 0.3495213985443115, "rewards_train/margins_2": 0.8268353939056396, "step": 203 }, { "epoch": 0.61, "logps_train/policy_1_2": -203.2698516845703, "logps_train/policy_1_l": -249.2530059814453, "logps_train/policy_1_w": -234.35235595703125, "logps_train/policy_2_2": -164.22811889648438, "logps_train/policy_2_w": -297.7318115234375, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -268.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -308.0, "rewards_train/1-2": 1.948014259338379, "rewards_train/1-l": -2.097174644470215, "rewards_train/1-w": 3.264763832092285, "rewards_train/2-2": 2.7084391117095947, "rewards_train/2-w": 1.095566987991333, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.3619384765625, "rewards_train/margins_1": 1.3167495727539062, "rewards_train/margins_2": 1.6128721237182617, "step": 203 }, { "epoch": 0.61, "logps_train/policy_1_2": -253.83363342285156, "logps_train/policy_1_l": -319.3211669921875, "logps_train/policy_1_w": -173.94375610351562, "logps_train/policy_2_2": -198.41055297851562, "logps_train/policy_2_w": -236.57981872558594, "logps_train/ref_1_2": -274.0, "logps_train/ref_1_l": -292.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 2.051011323928833, "rewards_train/1-l": -2.875084400177002, "rewards_train/1-w": 3.347810983657837, "rewards_train/2-2": 3.2745697498321533, "rewards_train/2-w": 1.901393175125122, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.222895383834839, "rewards_train/margins_1": 1.296799659729004, "rewards_train/margins_2": 1.3731765747070312, "step": 203 }, { "epoch": 0.61, "logps_train/policy_1_2": -136.30052185058594, "logps_train/policy_1_l": -176.54705810546875, "logps_train/policy_1_w": -116.23165893554688, "logps_train/policy_2_2": -109.38905334472656, "logps_train/policy_2_w": -139.45750427246094, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 0.9105724096298218, "rewards_train/1-l": -1.0105657577514648, "rewards_train/1-w": 2.479959011077881, "rewards_train/2-2": 1.48218834400177, "rewards_train/2-w": 1.6741719245910645, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 3.4905247688293457, "rewards_train/margins_1": 1.569386601448059, "rewards_train/margins_2": -0.19198358058929443, "step": 203 }, { "epoch": 0.61, "logps_train/policy_1_2": -148.37240600585938, "logps_train/policy_1_l": -142.6884765625, "logps_train/policy_1_w": -95.23626708984375, "logps_train/policy_2_2": -121.38619995117188, "logps_train/policy_2_w": -117.63383483886719, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -109.5, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -124.5, "rewards_train/1-2": 0.9373694062232971, "rewards_train/1-l": -1.4415029287338257, "rewards_train/1-w": 1.4242249727249146, "rewards_train/2-2": 1.6449739933013916, "rewards_train/2-w": 0.6739217042922974, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.8657279014587402, "rewards_train/margins_1": 0.48685556650161743, "rewards_train/margins_2": 0.9710522890090942, "step": 203 }, { "epoch": 0.61, "logps_train/policy_1_2": -267.74554443359375, "logps_train/policy_1_l": -231.99716186523438, "logps_train/policy_1_w": -159.93878173828125, "logps_train/policy_2_2": -205.3551788330078, "logps_train/policy_2_w": -207.06594848632812, "logps_train/ref_1_2": -288.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -244.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 1.9004464149475098, "rewards_train/1-l": -2.502840995788574, "rewards_train/1-w": 3.1600284576416016, "rewards_train/2-2": 3.839482307434082, "rewards_train/2-w": 2.0590291023254395, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.662869453430176, "rewards_train/margins_1": 1.2595820426940918, "rewards_train/margins_2": 1.7804532051086426, "step": 203 }, { "epoch": 0.61, "learning_rate": 4.169152406539933e-06, "loss": 0.6729, "step": 204 }, { "epoch": 0.61, "logps_train/policy_1_2": -167.84490966796875, "logps_train/policy_1_l": -150.91038513183594, "logps_train/policy_1_w": -119.84201049804688, "logps_train/policy_2_2": -147.9355010986328, "logps_train/policy_2_w": -140.49081420898438, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 0.6912895441055298, "rewards_train/1-l": -0.8621317744255066, "rewards_train/1-w": 1.4509550333023071, "rewards_train/2-2": 1.152543544769287, "rewards_train/2-w": 1.3884186744689941, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.3130868077278137, "rewards_train/margins_1": 0.7596654891967773, "rewards_train/margins_2": -0.23587512969970703, "step": 204 }, { "epoch": 0.61, "logps_train/policy_1_2": -169.36846923828125, "logps_train/policy_1_l": -196.88851928710938, "logps_train/policy_1_w": -128.7830047607422, "logps_train/policy_2_2": -144.27638244628906, "logps_train/policy_2_w": -158.41763305664062, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": 1.8928413391113281, "rewards_train/1-l": -1.6286952495574951, "rewards_train/1-w": 1.6530463695526123, "rewards_train/2-2": 2.7286124229431152, "rewards_train/2-w": 1.2308937311172485, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2817416191101074, "rewards_train/margins_1": -0.23979496955871582, "rewards_train/margins_2": 1.4977186918258667, "step": 204 }, { "epoch": 0.61, "logps_train/policy_1_2": -128.61346435546875, "logps_train/policy_1_l": -97.41973876953125, "logps_train/policy_1_w": -125.86569213867188, "logps_train/policy_2_2": -111.44453430175781, "logps_train/policy_2_w": -153.330078125, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 0.9050601124763489, "rewards_train/1-l": -1.4409972429275513, "rewards_train/1-w": 3.7118682861328125, "rewards_train/2-2": 1.4176554679870605, "rewards_train/2-w": 2.491992473602295, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.152865529060364, "rewards_train/margins_1": 2.8068081736564636, "rewards_train/margins_2": -1.0743370056152344, "step": 204 }, { "epoch": 0.61, "logps_train/policy_1_2": -143.0124969482422, "logps_train/policy_1_l": -111.23692321777344, "logps_train/policy_1_w": -105.79069519042969, "logps_train/policy_2_2": -111.34381866455078, "logps_train/policy_2_w": -134.94952392578125, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.0768749713897705, "rewards_train/1-l": -0.14791156351566315, "rewards_train/1-w": 1.8263992071151733, "rewards_train/2-2": 2.0706958770751953, "rewards_train/2-w": 1.1425477266311646, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.9743107706308365, "rewards_train/margins_1": 0.7495242357254028, "rewards_train/margins_2": 0.9281481504440308, "step": 204 }, { "epoch": 0.61, "logps_train/policy_1_2": -244.10116577148438, "logps_train/policy_1_l": -138.07859802246094, "logps_train/policy_1_w": -160.55126953125, "logps_train/policy_2_2": -197.0991668701172, "logps_train/policy_2_w": -195.56710815429688, "logps_train/ref_1_2": -254.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -224.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 1.049648404121399, "rewards_train/1-l": -0.9947740435600281, "rewards_train/1-w": 2.996044635772705, "rewards_train/2-2": 2.660200595855713, "rewards_train/2-w": 1.9469985961914062, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.990818679332733, "rewards_train/margins_1": 1.9463962316513062, "rewards_train/margins_2": 0.7132019996643066, "step": 204 }, { "epoch": 0.61, "logps_train/policy_1_2": -169.19216918945312, "logps_train/policy_1_l": -185.3419647216797, "logps_train/policy_1_w": -111.36260986328125, "logps_train/policy_2_2": -128.44398498535156, "logps_train/policy_2_w": -139.93426513671875, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.427657961845398, "rewards_train/1-l": -1.5720868110656738, "rewards_train/1-w": 1.9606138467788696, "rewards_train/2-2": 2.5462262630462646, "rewards_train/2-w": 1.630010724067688, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5327006578445435, "rewards_train/margins_1": 0.5329558849334717, "rewards_train/margins_2": 0.9162155389785767, "step": 204 }, { "epoch": 0.61, "logps_train/policy_1_2": -139.71652221679688, "logps_train/policy_1_l": -223.53485107421875, "logps_train/policy_1_w": -115.9722900390625, "logps_train/policy_2_2": -109.38235473632812, "logps_train/policy_2_w": -158.01309204101562, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": 1.3439724445343018, "rewards_train/1-l": -1.4593429565429688, "rewards_train/1-w": 1.599646806716919, "rewards_train/2-2": 1.7629374265670776, "rewards_train/2-w": 0.8893165588378906, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.0589897632598877, "rewards_train/margins_1": 0.2556743621826172, "rewards_train/margins_2": 0.873620867729187, "step": 204 }, { "epoch": 0.61, "logps_train/policy_1_2": -85.53192138671875, "logps_train/policy_1_l": -125.71005249023438, "logps_train/policy_1_w": -87.27518463134766, "logps_train/policy_2_2": -70.51383209228516, "logps_train/policy_2_w": -110.7242202758789, "logps_train/ref_1_2": -93.0, "logps_train/ref_1_l": -109.5, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -84.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 0.7514947652816772, "rewards_train/1-l": -1.6183693408966064, "rewards_train/1-w": 1.9881062507629395, "rewards_train/2-2": 1.3320157527923584, "rewards_train/2-w": 1.4369534254074097, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.606475591659546, "rewards_train/margins_1": 1.2366114854812622, "rewards_train/margins_2": -0.10493767261505127, "step": 204 }, { "epoch": 0.61, "logps_train/policy_1_2": -131.869384765625, "logps_train/policy_1_l": -148.5994110107422, "logps_train/policy_1_w": -103.22799682617188, "logps_train/policy_2_2": -100.95689392089844, "logps_train/policy_2_w": -143.00518798828125, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.0943106412887573, "rewards_train/1-l": -1.1833784580230713, "rewards_train/1-w": 2.2295444011688232, "rewards_train/2-2": 1.60118567943573, "rewards_train/2-w": 1.1916677951812744, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4129228591918945, "rewards_train/margins_1": 1.135233759880066, "rewards_train/margins_2": 0.40951788425445557, "step": 205 }, { "epoch": 0.61, "logps_train/policy_1_2": -169.96804809570312, "logps_train/policy_1_l": -165.19082641601562, "logps_train/policy_1_w": -174.32850646972656, "logps_train/policy_2_2": -135.4374237060547, "logps_train/policy_2_w": -214.38392639160156, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.6141321659088135, "rewards_train/1-l": -1.147208333015442, "rewards_train/1-w": 2.3222270011901855, "rewards_train/2-2": 2.4836020469665527, "rewards_train/2-w": 1.520202398300171, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4694353342056274, "rewards_train/margins_1": 0.7080948352813721, "rewards_train/margins_2": 0.9633996486663818, "step": 205 }, { "epoch": 0.61, "logps_train/policy_1_2": -42.920318603515625, "logps_train/policy_1_l": -63.2393684387207, "logps_train/policy_1_w": -74.7326431274414, "logps_train/policy_2_2": -31.822763442993164, "logps_train/policy_2_w": -93.14932250976562, "logps_train/ref_1_2": -47.75, "logps_train/ref_1_l": -55.5, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -38.5, "logps_train/ref_2_w": -96.5, "rewards_train/1-2": 0.47945255041122437, "rewards_train/1-l": -0.787999153137207, "rewards_train/1-w": 1.1514427661895752, "rewards_train/2-2": 0.6823720932006836, "rewards_train/2-w": 0.3477628827095032, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.9394419193267822, "rewards_train/margins_1": 0.6719902157783508, "rewards_train/margins_2": 0.3346092104911804, "step": 205 }, { "epoch": 0.61, "logps_train/policy_1_2": -129.27162170410156, "logps_train/policy_1_l": -141.85211181640625, "logps_train/policy_1_w": -86.83741760253906, "logps_train/policy_2_2": -111.89413452148438, "logps_train/policy_2_w": -101.2577133178711, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 2.035728693008423, "rewards_train/1-l": -1.0633361339569092, "rewards_train/1-w": 1.6928205490112305, "rewards_train/2-2": 2.270742893218994, "rewards_train/2-w": 1.4242289066314697, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.7561566829681396, "rewards_train/margins_1": -0.3429081439971924, "rewards_train/margins_2": 0.8465139865875244, "step": 205 }, { "epoch": 0.61, "logps_train/policy_1_2": -224.00418090820312, "logps_train/policy_1_l": -270.8719482421875, "logps_train/policy_1_w": -150.3978729248047, "logps_train/policy_2_2": -180.52212524414062, "logps_train/policy_2_w": -186.21632385253906, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -239.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 2.218332290649414, "rewards_train/1-l": -3.216492176055908, "rewards_train/1-w": 2.1430258750915527, "rewards_train/2-2": 3.5165367126464844, "rewards_train/2-w": 1.7549296617507935, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.359518051147461, "rewards_train/margins_1": -0.07530641555786133, "rewards_train/margins_2": 1.761607050895691, "step": 205 }, { "epoch": 0.61, "logps_train/policy_1_2": -178.1715087890625, "logps_train/policy_1_l": -197.86767578125, "logps_train/policy_1_w": -177.93728637695312, "logps_train/policy_2_2": -138.85916137695312, "logps_train/policy_2_w": -237.12225341796875, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -250.0, "rewards_train/1-2": 1.6109756231307983, "rewards_train/1-l": -0.6105964183807373, "rewards_train/1-w": 2.781270980834961, "rewards_train/2-2": 2.435957431793213, "rewards_train/2-w": 1.2627735137939453, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3918673992156982, "rewards_train/margins_1": 1.1702953577041626, "rewards_train/margins_2": 1.1731839179992676, "step": 205 }, { "epoch": 0.61, "logps_train/policy_1_2": -144.5259246826172, "logps_train/policy_1_l": -140.97039794921875, "logps_train/policy_1_w": -112.35025787353516, "logps_train/policy_2_2": -120.8114013671875, "logps_train/policy_2_w": -136.25210571289062, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 1.483735203742981, "rewards_train/1-l": -1.274774432182312, "rewards_train/1-w": 2.346224308013916, "rewards_train/2-2": 2.4588987827301025, "rewards_train/2-w": 1.4560401439666748, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.620998740196228, "rewards_train/margins_1": 0.8624891042709351, "rewards_train/margins_2": 1.0028586387634277, "step": 205 }, { "epoch": 0.61, "logps_train/policy_1_2": -76.73464965820312, "logps_train/policy_1_l": -109.03611755371094, "logps_train/policy_1_w": -61.52339553833008, "logps_train/policy_2_2": -54.55378723144531, "logps_train/policy_2_w": -90.60847473144531, "logps_train/ref_1_2": -83.0, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -76.0, "logps_train/ref_2_2": -65.0, "logps_train/ref_2_w": -97.5, "rewards_train/1-2": 0.6160855293273926, "rewards_train/1-l": -1.4611799716949463, "rewards_train/1-w": 1.4128947257995605, "rewards_train/2-2": 1.068009853363037, "rewards_train/2-w": 0.6993089914321899, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.874074697494507, "rewards_train/margins_1": 0.796809196472168, "rewards_train/margins_2": 0.36870086193084717, "step": 205 }, { "epoch": 0.62, "learning_rate": 4.150684402951994e-06, "loss": 0.8454, "step": 206 }, { "epoch": 0.62, "logps_train/policy_1_2": -112.32986450195312, "logps_train/policy_1_l": -159.8706817626953, "logps_train/policy_1_w": -150.2228546142578, "logps_train/policy_2_2": -91.6583251953125, "logps_train/policy_2_w": -180.92803955078125, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -112.5, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": 1.448263168334961, "rewards_train/1-l": -1.8571853637695312, "rewards_train/1-w": 3.547245979309082, "rewards_train/2-2": 2.0759644508361816, "rewards_train/2-w": 2.5868842601776123, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.404431343078613, "rewards_train/margins_1": 2.098982810974121, "rewards_train/margins_2": -0.5109198093414307, "step": 206 }, { "epoch": 0.62, "logps_train/policy_1_2": -162.30569458007812, "logps_train/policy_1_l": -194.0222625732422, "logps_train/policy_1_w": -173.18173217773438, "logps_train/policy_2_2": -135.81631469726562, "logps_train/policy_2_w": -214.32681274414062, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -205.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 1.8850550651550293, "rewards_train/1-l": -1.5672653913497925, "rewards_train/1-w": 3.158388137817383, "rewards_train/2-2": 2.720320701599121, "rewards_train/2-w": 1.7177101373672485, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.725653529167175, "rewards_train/margins_1": 1.2733330726623535, "rewards_train/margins_2": 1.0026105642318726, "step": 206 }, { "epoch": 0.62, "logps_train/policy_1_2": -106.59426879882812, "logps_train/policy_1_l": -109.32645416259766, "logps_train/policy_1_w": -99.65959167480469, "logps_train/policy_2_2": -89.03330993652344, "logps_train/policy_2_w": -119.39955139160156, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.2561981678009033, "rewards_train/1-l": -0.8941687345504761, "rewards_train/1-w": 1.8664628267288208, "rewards_train/2-2": 1.713074803352356, "rewards_train/2-w": 1.2276227474212646, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.760631561279297, "rewards_train/margins_1": 0.6102646589279175, "rewards_train/margins_2": 0.4854520559310913, "step": 206 }, { "epoch": 0.62, "logps_train/policy_1_2": -127.83397674560547, "logps_train/policy_1_l": -77.50057983398438, "logps_train/policy_1_w": -106.20639038085938, "logps_train/policy_2_2": -103.68862915039062, "logps_train/policy_2_w": -127.05606842041016, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -70.5, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": 1.2181648015975952, "rewards_train/1-l": -0.7131443023681641, "rewards_train/1-w": 1.7988922595977783, "rewards_train/2-2": 2.0670742988586426, "rewards_train/2-w": 0.9881423115730286, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.5120365619659424, "rewards_train/margins_1": 0.5807274580001831, "rewards_train/margins_2": 1.078931987285614, "step": 206 }, { "epoch": 0.62, "logps_train/policy_1_2": -135.85211181640625, "logps_train/policy_1_l": -160.46640014648438, "logps_train/policy_1_w": -128.47927856445312, "logps_train/policy_2_2": -111.86946105957031, "logps_train/policy_2_w": -157.01426696777344, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": 1.1206488609313965, "rewards_train/1-l": -1.3639676570892334, "rewards_train/1-w": 2.50036883354187, "rewards_train/2-2": 1.880631923675537, "rewards_train/2-w": 1.4186903238296509, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8643364906311035, "rewards_train/margins_1": 1.3797199726104736, "rewards_train/margins_2": 0.46194159984588623, "step": 206 }, { "epoch": 0.62, "logps_train/policy_1_2": -80.3272933959961, "logps_train/policy_1_l": -96.98313903808594, "logps_train/policy_1_w": -95.97651672363281, "logps_train/policy_2_2": -62.85005187988281, "logps_train/policy_2_w": -106.18392944335938, "logps_train/ref_1_2": -89.0, "logps_train/ref_1_l": -87.5, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -75.5, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 0.8860204219818115, "rewards_train/1-l": -0.9541730284690857, "rewards_train/1-w": 1.7656290531158447, "rewards_train/2-2": 1.2673383951187134, "rewards_train/2-w": 1.4394197463989258, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.7198020815849304, "rewards_train/margins_1": 0.8796086311340332, "rewards_train/margins_2": -0.1720813512802124, "step": 206 }, { "epoch": 0.62, "logps_train/policy_1_2": -183.59149169921875, "logps_train/policy_1_l": -216.63087463378906, "logps_train/policy_1_w": -139.5468292236328, "logps_train/policy_2_2": -147.41094970703125, "logps_train/policy_2_w": -179.64044189453125, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.865851879119873, "rewards_train/1-l": -1.4185566902160645, "rewards_train/1-w": 2.889066696166992, "rewards_train/2-2": 2.524529457092285, "rewards_train/2-w": 2.1132993698120117, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.307623386383057, "rewards_train/margins_1": 1.0232148170471191, "rewards_train/margins_2": 0.41123008728027344, "step": 206 }, { "epoch": 0.62, "logps_train/policy_1_2": -147.8570098876953, "logps_train/policy_1_l": -156.34335327148438, "logps_train/policy_1_w": -121.42012786865234, "logps_train/policy_2_2": -133.26881408691406, "logps_train/policy_2_w": -141.58685302734375, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 1.1830482482910156, "rewards_train/1-l": -0.9548435211181641, "rewards_train/1-w": 2.2372841835021973, "rewards_train/2-2": 1.3844473361968994, "rewards_train/2-w": 1.7122135162353516, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.1921277046203613, "rewards_train/margins_1": 1.0542359352111816, "rewards_train/margins_2": -0.32776618003845215, "step": 206 }, { "epoch": 0.62, "logps_train/policy_1_2": -153.79373168945312, "logps_train/policy_1_l": -108.1059341430664, "logps_train/policy_1_w": -72.76213073730469, "logps_train/policy_2_2": -128.593505859375, "logps_train/policy_2_w": -95.48527526855469, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -101.0, "rewards_train/1-2": 1.6456257104873657, "rewards_train/1-l": -1.6398909091949463, "rewards_train/1-w": 1.2937091588974, "rewards_train/2-2": 2.543775796890259, "rewards_train/2-w": 0.5241293907165527, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.933600068092346, "rewards_train/margins_1": -0.3519165515899658, "rewards_train/margins_2": 2.019646406173706, "step": 207 }, { "epoch": 0.62, "logps_train/policy_1_2": -99.83082580566406, "logps_train/policy_1_l": -76.52627563476562, "logps_train/policy_1_w": -67.75238800048828, "logps_train/policy_2_2": -64.1996078491211, "logps_train/policy_2_w": -101.53489685058594, "logps_train/ref_1_2": -110.0, "logps_train/ref_1_l": -68.0, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -105.0, "rewards_train/1-2": 0.993870735168457, "rewards_train/1-l": -0.8299708366394043, "rewards_train/1-w": 1.2255425453186035, "rewards_train/2-2": 2.1198830604553223, "rewards_train/2-w": 0.3472915291786194, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.055513381958008, "rewards_train/margins_1": 0.23167181015014648, "rewards_train/margins_2": 1.7725915312767029, "step": 207 }, { "epoch": 0.62, "logps_train/policy_1_2": -134.47439575195312, "logps_train/policy_1_l": -132.39488220214844, "logps_train/policy_1_w": -156.2952880859375, "logps_train/policy_2_2": -111.61468505859375, "logps_train/policy_2_w": -185.80523681640625, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 1.3556851148605347, "rewards_train/1-l": -0.7685903310775757, "rewards_train/1-w": 2.1985952854156494, "rewards_train/2-2": 1.7939997911453247, "rewards_train/2-w": 1.5554134845733643, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.967185616493225, "rewards_train/margins_1": 0.8429101705551147, "rewards_train/margins_2": 0.23858630657196045, "step": 207 }, { "epoch": 0.62, "logps_train/policy_1_2": -165.302734375, "logps_train/policy_1_l": -115.99520874023438, "logps_train/policy_1_w": -111.64865112304688, "logps_train/policy_2_2": -140.14862060546875, "logps_train/policy_2_w": -135.34637451171875, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.6650389432907104, "rewards_train/1-l": -0.6696869134902954, "rewards_train/1-w": 1.9715607166290283, "rewards_train/2-2": 2.532012939453125, "rewards_train/2-w": 0.9930000305175781, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.6412476301193237, "rewards_train/margins_1": 0.30652177333831787, "rewards_train/margins_2": 1.5390129089355469, "step": 207 }, { "epoch": 0.62, "logps_train/policy_1_2": -138.7984619140625, "logps_train/policy_1_l": -101.22151947021484, "logps_train/policy_1_w": -90.37826538085938, "logps_train/policy_2_2": -118.0267333984375, "logps_train/policy_2_w": -107.96931457519531, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -121.5, "rewards_train/1-2": 2.0814828872680664, "rewards_train/1-l": -1.0836751461029053, "rewards_train/1-w": 1.9561185836791992, "rewards_train/2-2": 2.4887328147888184, "rewards_train/2-w": 1.371622920036316, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.0397937297821045, "rewards_train/margins_1": -0.1253643035888672, "rewards_train/margins_2": 1.1171098947525024, "step": 207 }, { "epoch": 0.62, "logps_train/policy_1_2": -136.6744384765625, "logps_train/policy_1_l": -122.24896240234375, "logps_train/policy_1_w": -75.26268005371094, "logps_train/policy_2_2": -96.29350280761719, "logps_train/policy_2_w": -96.75164794921875, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -111.5, "rewards_train/1-2": 1.0700565576553345, "rewards_train/1-l": -1.2225534915924072, "rewards_train/1-w": 1.8018577098846436, "rewards_train/2-2": 2.0284624099731445, "rewards_train/2-w": 1.46467924118042, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.024411201477051, "rewards_train/margins_1": 0.7318011522293091, "rewards_train/margins_2": 0.5637831687927246, "step": 207 }, { "epoch": 0.62, "logps_train/policy_1_2": -94.1766586303711, "logps_train/policy_1_l": -106.4543685913086, "logps_train/policy_1_w": -108.32703399658203, "logps_train/policy_2_2": -81.29885864257812, "logps_train/policy_2_w": -121.00750732421875, "logps_train/ref_1_2": -103.5, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -93.5, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 0.951475203037262, "rewards_train/1-l": -1.1536399126052856, "rewards_train/1-w": 1.5378046035766602, "rewards_train/2-2": 1.2150357961654663, "rewards_train/2-w": 1.2201480865478516, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.691444516181946, "rewards_train/margins_1": 0.5863294005393982, "rewards_train/margins_2": -0.005112290382385254, "step": 207 }, { "epoch": 0.62, "logps_train/policy_1_2": -127.99747467041016, "logps_train/policy_1_l": -123.88328552246094, "logps_train/policy_1_w": -111.41062927246094, "logps_train/policy_2_2": -107.20404815673828, "logps_train/policy_2_w": -128.4058837890625, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.4377524852752686, "rewards_train/1-l": -1.1758288145065308, "rewards_train/1-w": 2.2370617389678955, "rewards_train/2-2": 2.0983452796936035, "rewards_train/2-w": 1.7422256469726562, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4128905534744263, "rewards_train/margins_1": 0.799309253692627, "rewards_train/margins_2": 0.35611963272094727, "step": 207 }, { "epoch": 0.62, "learning_rate": 4.132055295476304e-06, "loss": 0.8576, "step": 208 }, { "epoch": 0.62, "logps_train/policy_1_2": -130.65921020507812, "logps_train/policy_1_l": -138.8429718017578, "logps_train/policy_1_w": -117.99491882324219, "logps_train/policy_2_2": -111.01554870605469, "logps_train/policy_2_w": -131.86785888671875, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 2.0059542655944824, "rewards_train/1-l": -1.0952351093292236, "rewards_train/1-w": 3.0520706176757812, "rewards_train/2-2": 2.379695177078247, "rewards_train/2-w": 2.8132152557373047, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.147305727005005, "rewards_train/margins_1": 1.0461163520812988, "rewards_train/margins_2": -0.4335200786590576, "step": 208 }, { "epoch": 0.62, "logps_train/policy_1_2": -165.49362182617188, "logps_train/policy_1_l": -153.586181640625, "logps_train/policy_1_w": -116.75888061523438, "logps_train/policy_2_2": -146.6573028564453, "logps_train/policy_2_w": -137.6011962890625, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.3912620544433594, "rewards_train/1-l": -1.1794203519821167, "rewards_train/1-w": 1.470205545425415, "rewards_train/2-2": 1.8319259881973267, "rewards_train/2-w": 0.7570688128471375, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.6496258974075317, "rewards_train/margins_1": 0.07894349098205566, "rewards_train/margins_2": 1.0748571753501892, "step": 208 }, { "epoch": 0.62, "logps_train/policy_1_2": -122.79713439941406, "logps_train/policy_1_l": -111.4393310546875, "logps_train/policy_1_w": -100.86077117919922, "logps_train/policy_2_2": -96.0282211303711, "logps_train/policy_2_w": -126.81695556640625, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 0.9109119772911072, "rewards_train/1-l": -1.3552618026733398, "rewards_train/1-w": 2.4811103343963623, "rewards_train/2-2": 1.814365267753601, "rewards_train/2-w": 1.6651802062988281, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.836372137069702, "rewards_train/margins_1": 1.5701983571052551, "rewards_train/margins_2": 0.14918506145477295, "step": 208 }, { "epoch": 0.62, "logps_train/policy_1_2": -143.70538330078125, "logps_train/policy_1_l": -141.34732055664062, "logps_train/policy_1_w": -104.94651794433594, "logps_train/policy_2_2": -112.86553955078125, "logps_train/policy_2_w": -131.8444366455078, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.0825856924057007, "rewards_train/1-l": -1.4581692218780518, "rewards_train/1-w": 1.5067156553268433, "rewards_train/2-2": 1.88844633102417, "rewards_train/2-w": 0.9304008483886719, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.964884877204895, "rewards_train/margins_1": 0.4241299629211426, "rewards_train/margins_2": 0.958045482635498, "step": 208 }, { "epoch": 0.62, "logps_train/policy_1_2": -138.17303466796875, "logps_train/policy_1_l": -233.63037109375, "logps_train/policy_1_w": -273.48577880859375, "logps_train/policy_2_2": -114.42958068847656, "logps_train/policy_2_w": -306.3079833984375, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -220.0, "logps_train/ref_1_w": -274.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -300.0, "rewards_train/1-2": 2.2299628257751465, "rewards_train/1-l": -1.3989747762680054, "rewards_train/1-w": 0.054544925689697266, "rewards_train/2-2": 2.6914167404174805, "rewards_train/2-w": -0.6807984113693237, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.4535197019577026, "rewards_train/margins_1": -2.175417900085449, "rewards_train/margins_2": 3.372215151786804, "step": 208 }, { "epoch": 0.62, "logps_train/policy_1_2": -202.4282684326172, "logps_train/policy_1_l": -181.70022583007812, "logps_train/policy_1_w": -179.7428741455078, "logps_train/policy_2_2": -158.92938232421875, "logps_train/policy_2_w": -214.3707733154297, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 2.0571722984313965, "rewards_train/1-l": -1.2349920272827148, "rewards_train/1-w": 2.626103162765503, "rewards_train/2-2": 3.113311290740967, "rewards_train/2-w": 1.9183924198150635, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8610951900482178, "rewards_train/margins_1": 0.5689308643341064, "rewards_train/margins_2": 1.1949188709259033, "step": 208 }, { "epoch": 0.62, "logps_train/policy_1_2": -108.69012451171875, "logps_train/policy_1_l": -53.172691345214844, "logps_train/policy_1_w": -91.1253662109375, "logps_train/policy_2_2": -84.36540222167969, "logps_train/policy_2_w": -107.77574920654297, "logps_train/ref_1_2": -118.0, "logps_train/ref_1_l": -49.75, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 0.9153621196746826, "rewards_train/1-l": -0.3599209189414978, "rewards_train/1-w": 1.4694948196411133, "rewards_train/2-2": 1.3958812952041626, "rewards_train/2-w": 1.0325815677642822, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.829415738582611, "rewards_train/margins_1": 0.5541326999664307, "rewards_train/margins_2": 0.36329972743988037, "step": 208 }, { "epoch": 0.62, "logps_train/policy_1_2": -225.6314239501953, "logps_train/policy_1_l": -138.92965698242188, "logps_train/policy_1_w": -211.7039794921875, "logps_train/policy_2_2": -187.61813354492188, "logps_train/policy_2_w": -248.4869384765625, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -236.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 1.1321699619293213, "rewards_train/1-l": -0.7511683702468872, "rewards_train/1-w": 2.4108524322509766, "rewards_train/2-2": 2.1991255283355713, "rewards_train/2-w": 1.191930890083313, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1620208024978638, "rewards_train/margins_1": 1.2786824703216553, "rewards_train/margins_2": 1.0071946382522583, "step": 208 }, { "epoch": 0.63, "logps_train/policy_1_2": -162.760009765625, "logps_train/policy_1_l": -150.80654907226562, "logps_train/policy_1_w": -88.4383773803711, "logps_train/policy_2_2": -131.05941772460938, "logps_train/policy_2_w": -113.10057067871094, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -104.5, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.4880623817443848, "rewards_train/1-l": -0.9665927886962891, "rewards_train/1-w": 1.6311614513397217, "rewards_train/2-2": 2.513395071029663, "rewards_train/2-w": 1.022754430770874, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.5977542400360107, "rewards_train/margins_1": 0.14309906959533691, "rewards_train/margins_2": 1.490640640258789, "step": 209 }, { "epoch": 0.63, "logps_train/policy_1_2": -203.66343688964844, "logps_train/policy_1_l": -174.24473571777344, "logps_train/policy_1_w": -207.7444610595703, "logps_train/policy_2_2": -178.80355834960938, "logps_train/policy_2_w": -239.7643280029297, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -236.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 1.3274056911468506, "rewards_train/1-l": -0.8244732618331909, "rewards_train/1-w": 2.866180419921875, "rewards_train/2-2": 2.2008934020996094, "rewards_train/2-w": 1.9673190116882324, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.690653681755066, "rewards_train/margins_1": 1.5387747287750244, "rewards_train/margins_2": 0.23357439041137695, "step": 209 }, { "epoch": 0.63, "logps_train/policy_1_2": -173.2731475830078, "logps_train/policy_1_l": -194.11520385742188, "logps_train/policy_1_w": -204.27777099609375, "logps_train/policy_2_2": -151.32151794433594, "logps_train/policy_2_w": -241.40206909179688, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -240.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -264.0, "rewards_train/1-2": 1.9164354801177979, "rewards_train/1-l": -1.4083962440490723, "rewards_train/1-w": 3.5605037212371826, "rewards_train/2-2": 2.533473014831543, "rewards_train/2-w": 2.2426061630249023, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.968899965286255, "rewards_train/margins_1": 1.6440682411193848, "rewards_train/margins_2": 0.2908668518066406, "step": 209 }, { "epoch": 0.63, "logps_train/policy_1_2": -69.50424194335938, "logps_train/policy_1_l": -76.15882873535156, "logps_train/policy_1_w": -66.52759552001953, "logps_train/policy_2_2": -54.200042724609375, "logps_train/policy_2_w": -88.75730895996094, "logps_train/ref_1_2": -79.0, "logps_train/ref_1_l": -73.0, "logps_train/ref_1_w": -82.5, "logps_train/ref_2_2": -68.5, "logps_train/ref_2_w": -99.0, "rewards_train/1-2": 0.9573880434036255, "rewards_train/1-l": -0.33853888511657715, "rewards_train/1-w": 1.6045645475387573, "rewards_train/2-2": 1.42374587059021, "rewards_train/2-w": 0.9828633069992065, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.9431034326553345, "rewards_train/margins_1": 0.6471765041351318, "rewards_train/margins_2": 0.4408825635910034, "step": 209 }, { "epoch": 0.63, "logps_train/policy_1_2": -64.27108764648438, "logps_train/policy_1_l": -59.67937088012695, "logps_train/policy_1_w": -49.003841400146484, "logps_train/policy_2_2": -52.173492431640625, "logps_train/policy_2_w": -60.851497650146484, "logps_train/ref_1_2": -78.5, "logps_train/ref_1_l": -52.0, "logps_train/ref_1_w": -59.25, "logps_train/ref_2_2": -68.5, "logps_train/ref_2_w": -67.0, "rewards_train/1-2": 1.4228917360305786, "rewards_train/1-l": -0.7585622072219849, "rewards_train/1-w": 1.038287878036499, "rewards_train/2-2": 1.66390061378479, "rewards_train/2-w": 0.5789129734039307, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.7968500852584839, "rewards_train/margins_1": -0.3846038579940796, "rewards_train/margins_2": 1.0849876403808594, "step": 209 }, { "epoch": 0.63, "logps_train/policy_1_2": -71.79399108886719, "logps_train/policy_1_l": -55.04302978515625, "logps_train/policy_1_w": -53.52717208862305, "logps_train/policy_2_2": -54.10047912597656, "logps_train/policy_2_w": -68.52449035644531, "logps_train/ref_1_2": -82.0, "logps_train/ref_1_l": -48.0, "logps_train/ref_1_w": -63.0, "logps_train/ref_2_2": -70.0, "logps_train/ref_2_w": -76.0, "rewards_train/1-2": 1.0228309631347656, "rewards_train/1-l": -0.6982393264770508, "rewards_train/1-w": 0.9519703388214111, "rewards_train/2-2": 1.6240769624710083, "rewards_train/2-w": 0.7477459907531738, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.650209665298462, "rewards_train/margins_1": -0.07086062431335449, "rewards_train/margins_2": 0.8763309717178345, "step": 209 }, { "epoch": 0.63, "logps_train/policy_1_2": -149.4147186279297, "logps_train/policy_1_l": -171.32672119140625, "logps_train/policy_1_w": -156.6338348388672, "logps_train/policy_2_2": -129.51080322265625, "logps_train/policy_2_w": -190.5063934326172, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.5386065244674683, "rewards_train/1-l": -1.2244679927825928, "rewards_train/1-w": 2.4686477184295654, "rewards_train/2-2": 1.8887641429901123, "rewards_train/2-w": 1.197016716003418, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.693115711212158, "rewards_train/margins_1": 0.9300411939620972, "rewards_train/margins_2": 0.6917474269866943, "step": 209 }, { "epoch": 0.63, "logps_train/policy_1_2": -187.00701904296875, "logps_train/policy_1_l": -144.14170837402344, "logps_train/policy_1_w": -106.03463745117188, "logps_train/policy_2_2": -154.4089813232422, "logps_train/policy_2_w": -123.65869140625, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 2.1664857864379883, "rewards_train/1-l": -0.8520614504814148, "rewards_train/1-w": 2.084818124771118, "rewards_train/2-2": 3.0716023445129395, "rewards_train/2-w": 1.6653813123703003, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.936879575252533, "rewards_train/margins_1": -0.08166766166687012, "rewards_train/margins_2": 1.4062210321426392, "step": 209 }, { "epoch": 0.63, "learning_rate": 4.113266902280914e-06, "loss": 1.213, "step": 210 }, { "epoch": 0.63, "logps_train/policy_1_2": -194.44818115234375, "logps_train/policy_1_l": -157.57763671875, "logps_train/policy_1_w": -117.08256530761719, "logps_train/policy_2_2": -158.62350463867188, "logps_train/policy_2_w": -151.8249969482422, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.3458081483840942, "rewards_train/1-l": -1.4421396255493164, "rewards_train/1-w": 2.0304155349731445, "rewards_train/2-2": 2.4189000129699707, "rewards_train/2-w": 1.318280816078186, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.472555160522461, "rewards_train/margins_1": 0.6846073865890503, "rewards_train/margins_2": 1.1006191968917847, "step": 210 }, { "epoch": 0.63, "logps_train/policy_1_2": -204.88758850097656, "logps_train/policy_1_l": -136.3980712890625, "logps_train/policy_1_w": -142.82362365722656, "logps_train/policy_2_2": -159.2660675048828, "logps_train/policy_2_w": -182.89007568359375, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.161241054534912, "rewards_train/1-l": -1.6070926189422607, "rewards_train/1-w": 3.1192007064819336, "rewards_train/2-2": 3.065579891204834, "rewards_train/2-w": 1.9813045263290405, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.726293325424194, "rewards_train/margins_1": 1.9579596519470215, "rewards_train/margins_2": 1.0842753648757935, "step": 210 }, { "epoch": 0.63, "logps_train/policy_1_2": -63.49850082397461, "logps_train/policy_1_l": -114.50823974609375, "logps_train/policy_1_w": -79.57882690429688, "logps_train/policy_2_2": -46.723751068115234, "logps_train/policy_2_w": -110.61878204345703, "logps_train/ref_1_2": -76.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -97.5, "logps_train/ref_2_2": -62.0, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 1.2407748699188232, "rewards_train/1-l": -1.3672301769256592, "rewards_train/1-w": 1.8010045289993286, "rewards_train/2-2": 1.5246217250823975, "rewards_train/2-w": 0.8099969625473022, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.168234705924988, "rewards_train/margins_1": 0.5602296590805054, "rewards_train/margins_2": 0.7146247625350952, "step": 210 }, { "epoch": 0.63, "logps_train/policy_1_2": -115.82769775390625, "logps_train/policy_1_l": -134.9366455078125, "logps_train/policy_1_w": -133.76950073242188, "logps_train/policy_2_2": -94.24639892578125, "logps_train/policy_2_w": -175.11546325683594, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 1.4297304153442383, "rewards_train/1-l": -1.6186649799346924, "rewards_train/1-w": 2.318362236022949, "rewards_train/2-2": 1.9534857273101807, "rewards_train/2-w": 0.7962662577629089, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9370272159576416, "rewards_train/margins_1": 0.8886318206787109, "rewards_train/margins_2": 1.1572194695472717, "step": 210 }, { "epoch": 0.63, "logps_train/policy_1_2": -118.57600402832031, "logps_train/policy_1_l": -139.47256469726562, "logps_train/policy_1_w": -103.06980895996094, "logps_train/policy_2_2": -93.27430725097656, "logps_train/policy_2_w": -129.51077270507812, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.7420083284378052, "rewards_train/1-l": -1.0611236095428467, "rewards_train/1-w": 1.4201675653457642, "rewards_train/2-2": 1.3536248207092285, "rewards_train/2-w": 0.6465787291526794, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.481291174888611, "rewards_train/margins_1": 0.678159236907959, "rewards_train/margins_2": 0.7070460915565491, "step": 210 }, { "epoch": 0.63, "logps_train/policy_1_2": -251.9794921875, "logps_train/policy_1_l": -231.67095947265625, "logps_train/policy_1_w": -200.1079559326172, "logps_train/policy_2_2": -200.1163787841797, "logps_train/policy_2_w": -236.5803985595703, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -236.0, "logps_train/ref_2_2": -233.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 1.8645505905151367, "rewards_train/1-l": -1.9217826128005981, "rewards_train/1-w": 3.506391763687134, "rewards_train/2-2": 3.338362216949463, "rewards_train/2-w": 1.9044599533081055, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.428174376487732, "rewards_train/margins_1": 1.641841173171997, "rewards_train/margins_2": 1.4339022636413574, "step": 210 }, { "epoch": 0.63, "logps_train/policy_1_2": -168.96170043945312, "logps_train/policy_1_l": -79.34565734863281, "logps_train/policy_1_w": -121.64839172363281, "logps_train/policy_2_2": -140.06295776367188, "logps_train/policy_2_w": -143.98081970214844, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -77.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 0.7217993140220642, "rewards_train/1-l": -0.23026880621910095, "rewards_train/1-w": 1.815483808517456, "rewards_train/2-2": 1.6806186437606812, "rewards_train/2-w": 0.9309948086738586, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.045752614736557, "rewards_train/margins_1": 1.0936844944953918, "rewards_train/margins_2": 0.7496238350868225, "step": 210 }, { "epoch": 0.63, "logps_train/policy_1_2": -137.46861267089844, "logps_train/policy_1_l": -134.35928344726562, "logps_train/policy_1_w": -83.9180679321289, "logps_train/policy_2_2": -111.95204162597656, "logps_train/policy_2_w": -98.29283905029297, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -97.5, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -108.0, "rewards_train/1-2": 0.5031383633613586, "rewards_train/1-l": -1.4410057067871094, "rewards_train/1-w": 1.3667874336242676, "rewards_train/2-2": 1.324326515197754, "rewards_train/2-w": 0.9707155823707581, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.807793140411377, "rewards_train/margins_1": 0.8636490702629089, "rewards_train/margins_2": 0.35361093282699585, "step": 210 }, { "epoch": 0.63, "logps_train/policy_1_2": -163.32577514648438, "logps_train/policy_1_l": -245.3316650390625, "logps_train/policy_1_w": -143.003662109375, "logps_train/policy_2_2": -121.85639953613281, "logps_train/policy_2_w": -193.3279571533203, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -217.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.0529690980911255, "rewards_train/1-l": -2.8456664085388184, "rewards_train/1-w": 2.7418205738067627, "rewards_train/2-2": 2.1764698028564453, "rewards_train/2-w": 2.035954475402832, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.587486982345581, "rewards_train/margins_1": 1.6888514757156372, "rewards_train/margins_2": 0.14051532745361328, "step": 211 }, { "epoch": 0.63, "logps_train/policy_1_2": -163.4895477294922, "logps_train/policy_1_l": -158.2583770751953, "logps_train/policy_1_w": -179.20852661132812, "logps_train/policy_2_2": -127.94756317138672, "logps_train/policy_2_w": -218.96646118164062, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.649482250213623, "rewards_train/1-l": -1.6335536241531372, "rewards_train/1-w": 3.304147720336914, "rewards_train/2-2": 2.592742919921875, "rewards_train/2-w": 1.8408554792404175, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.937701344490051, "rewards_train/margins_1": 1.654665470123291, "rewards_train/margins_2": 0.7518874406814575, "step": 211 }, { "epoch": 0.63, "logps_train/policy_1_2": -194.83045959472656, "logps_train/policy_1_l": -147.57183837890625, "logps_train/policy_1_w": -122.06636810302734, "logps_train/policy_2_2": -165.96694946289062, "logps_train/policy_2_w": -159.9215087890625, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.907578706741333, "rewards_train/1-l": -1.8782777786254883, "rewards_train/1-w": 2.556253671646118, "rewards_train/2-2": 2.945101737976074, "rewards_train/2-w": 1.3412485122680664, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.4345314502716064, "rewards_train/margins_1": 0.6486749649047852, "rewards_train/margins_2": 1.6038532257080078, "step": 211 }, { "epoch": 0.63, "logps_train/policy_1_2": -92.69377899169922, "logps_train/policy_1_l": -64.84912109375, "logps_train/policy_1_w": -66.44412994384766, "logps_train/policy_2_2": -66.09783935546875, "logps_train/policy_2_w": -89.79582214355469, "logps_train/ref_1_2": -102.5, "logps_train/ref_1_l": -58.0, "logps_train/ref_1_w": -83.5, "logps_train/ref_2_2": -82.0, "logps_train/ref_2_w": -99.0, "rewards_train/1-2": 0.9837473630905151, "rewards_train/1-l": -0.691162109375, "rewards_train/1-w": 1.6819541454315186, "rewards_train/2-2": 1.5620911121368408, "rewards_train/2-w": 0.9038158655166626, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.3731162548065186, "rewards_train/margins_1": 0.6982067823410034, "rewards_train/margins_2": 0.6582752466201782, "step": 211 }, { "epoch": 0.63, "logps_train/policy_1_2": -234.1306610107422, "logps_train/policy_1_l": -255.17327880859375, "logps_train/policy_1_w": -261.57470703125, "logps_train/policy_2_2": -178.45648193359375, "logps_train/policy_2_w": -324.70367431640625, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -220.0, "logps_train/ref_1_w": -304.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -340.0, "rewards_train/1-2": 0.749433696269989, "rewards_train/1-l": -3.5735769271850586, "rewards_train/1-w": 4.20503044128418, "rewards_train/2-2": 2.991852283477783, "rewards_train/2-w": 1.6655704975128174, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.778607368469238, "rewards_train/margins_1": 3.4555967450141907, "rewards_train/margins_2": 1.3262817859649658, "step": 211 }, { "epoch": 0.63, "logps_train/policy_1_2": -137.83660888671875, "logps_train/policy_1_l": -233.03448486328125, "logps_train/policy_1_w": -60.54359817504883, "logps_train/policy_2_2": -113.59619140625, "logps_train/policy_2_w": -78.75492858886719, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -69.5, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -85.0, "rewards_train/1-2": 0.7530574798583984, "rewards_train/1-l": -2.9706361293792725, "rewards_train/1-w": 0.9219098091125488, "rewards_train/2-2": 1.5485838651657104, "rewards_train/2-w": 0.6215776801109314, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8925459384918213, "rewards_train/margins_1": 0.1688523292541504, "rewards_train/margins_2": 0.927006185054779, "step": 211 }, { "epoch": 0.63, "logps_train/policy_1_2": -124.73674011230469, "logps_train/policy_1_l": -155.37823486328125, "logps_train/policy_1_w": -119.39248657226562, "logps_train/policy_2_2": -83.06257629394531, "logps_train/policy_2_w": -168.23483276367188, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 0.9747636914253235, "rewards_train/1-l": -1.1852834224700928, "rewards_train/1-w": 1.9790122509002686, "rewards_train/2-2": 2.112492322921753, "rewards_train/2-w": 0.984524130821228, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1642956733703613, "rewards_train/margins_1": 1.004248559474945, "rewards_train/margins_2": 1.127968192100525, "step": 211 }, { "epoch": 0.63, "logps_train/policy_1_2": -162.11192321777344, "logps_train/policy_1_l": -146.6258544921875, "logps_train/policy_1_w": -82.41917419433594, "logps_train/policy_2_2": -128.40211486816406, "logps_train/policy_2_w": -96.76974487304688, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -110.0, "rewards_train/1-2": 0.927870512008667, "rewards_train/1-l": -1.2621943950653076, "rewards_train/1-w": 1.7698016166687012, "rewards_train/2-2": 2.254319429397583, "rewards_train/2-w": 1.2804484367370605, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.031996011734009, "rewards_train/margins_1": 0.8419311046600342, "rewards_train/margins_2": 0.9738709926605225, "step": 211 }, { "epoch": 0.63, "learning_rate": 4.094321057079874e-06, "loss": 0.7339, "step": 212 }, { "epoch": 0.63, "logps_train/policy_1_2": -187.45169067382812, "logps_train/policy_1_l": -164.76205444335938, "logps_train/policy_1_w": -171.12005615234375, "logps_train/policy_2_2": -150.1531982421875, "logps_train/policy_2_w": -232.2731475830078, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -252.0, "rewards_train/1-2": 1.6360805034637451, "rewards_train/1-l": -1.4721041917800903, "rewards_train/1-w": 3.925494909286499, "rewards_train/2-2": 2.605774402618408, "rewards_train/2-w": 1.944559931755066, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.397599101066589, "rewards_train/margins_1": 2.289414405822754, "rewards_train/margins_2": 0.6612144708633423, "step": 212 }, { "epoch": 0.63, "logps_train/policy_1_2": -197.71014404296875, "logps_train/policy_1_l": -221.7506866455078, "logps_train/policy_1_w": -192.60714721679688, "logps_train/policy_2_2": -156.12860107421875, "logps_train/policy_2_w": -235.76348876953125, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -201.0, "logps_train/ref_1_w": -228.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -258.0, "rewards_train/1-2": 1.1551566123962402, "rewards_train/1-l": -2.068819046020508, "rewards_train/1-w": 3.506474018096924, "rewards_train/2-2": 2.58089017868042, "rewards_train/2-w": 2.2892768383026123, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.575293064117432, "rewards_train/margins_1": 2.3513174057006836, "rewards_train/margins_2": 0.2916133403778076, "step": 212 }, { "epoch": 0.63, "logps_train/policy_1_2": -182.7451171875, "logps_train/policy_1_l": -147.7449951171875, "logps_train/policy_1_w": -86.4307861328125, "logps_train/policy_2_2": -147.74183654785156, "logps_train/policy_2_w": -110.42845153808594, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -119.5, "rewards_train/1-2": -0.27216836810112, "rewards_train/1-l": -1.3705940246582031, "rewards_train/1-w": 1.5217647552490234, "rewards_train/2-2": 0.5367541313171387, "rewards_train/2-w": 0.8805921673774719, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.8923587799072266, "rewards_train/margins_1": 1.7939331233501434, "rewards_train/margins_2": -0.34383803606033325, "step": 212 }, { "epoch": 0.63, "logps_train/policy_1_2": -119.59483337402344, "logps_train/policy_1_l": -128.76406860351562, "logps_train/policy_1_w": -74.93264770507812, "logps_train/policy_2_2": -106.14977264404297, "logps_train/policy_2_w": -89.3376693725586, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -103.5, "rewards_train/1-2": 1.3795788288116455, "rewards_train/1-l": -1.388516902923584, "rewards_train/1-w": 1.566110372543335, "rewards_train/2-2": 1.8656866550445557, "rewards_train/2-w": 1.4060771465301514, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.954627275466919, "rewards_train/margins_1": 0.18653154373168945, "rewards_train/margins_2": 0.4596095085144043, "step": 212 }, { "epoch": 0.63, "logps_train/policy_1_2": -96.05492401123047, "logps_train/policy_1_l": -130.04283142089844, "logps_train/policy_1_w": -96.90900421142578, "logps_train/policy_2_2": -79.5149917602539, "logps_train/policy_2_w": -120.46728515625, "logps_train/ref_1_2": -104.5, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -89.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 0.8757579326629639, "rewards_train/1-l": -0.8692243695259094, "rewards_train/1-w": 1.4444512128829956, "rewards_train/2-2": 0.9891257286071777, "rewards_train/2-w": 1.1001461744308472, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.313675582408905, "rewards_train/margins_1": 0.5686932802200317, "rewards_train/margins_2": -0.11102044582366943, "step": 212 }, { "epoch": 0.63, "logps_train/policy_1_2": -151.76333618164062, "logps_train/policy_1_l": -158.44699096679688, "logps_train/policy_1_w": -89.2464599609375, "logps_train/policy_2_2": -116.88267517089844, "logps_train/policy_2_w": -130.28067016601562, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.2236677408218384, "rewards_train/1-l": -2.5173535346984863, "rewards_train/1-w": 2.7183218002319336, "rewards_train/2-2": 2.421107292175293, "rewards_train/2-w": 2.033651828765869, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.23567533493042, "rewards_train/margins_1": 1.4946540594100952, "rewards_train/margins_2": 0.38745546340942383, "step": 212 }, { "epoch": 0.63, "logps_train/policy_1_2": -155.6223907470703, "logps_train/policy_1_l": -108.76568603515625, "logps_train/policy_1_w": -98.92723846435547, "logps_train/policy_2_2": -126.04219055175781, "logps_train/policy_2_w": -125.88424682617188, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -115.5, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 0.5721356868743896, "rewards_train/1-l": -0.9164126515388489, "rewards_train/1-w": 1.6604015827178955, "rewards_train/2-2": 1.470780372619629, "rewards_train/2-w": 0.9115756750106812, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.5768142342567444, "rewards_train/margins_1": 1.0882658958435059, "rewards_train/margins_2": 0.5592046976089478, "step": 212 }, { "epoch": 0.63, "logps_train/policy_1_2": -236.53341674804688, "logps_train/policy_1_l": -237.50094604492188, "logps_train/policy_1_w": -135.7576904296875, "logps_train/policy_2_2": -197.4955596923828, "logps_train/policy_2_w": -161.88619995117188, "logps_train/ref_1_2": -255.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": 1.7904083728790283, "rewards_train/1-l": -1.5696271657943726, "rewards_train/1-w": 2.343761444091797, "rewards_train/2-2": 3.0676307678222656, "rewards_train/2-w": 1.940286636352539, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.9133886098861694, "rewards_train/margins_1": 0.5533530712127686, "rewards_train/margins_2": 1.1273441314697266, "step": 212 }, { "epoch": 0.64, "logps_train/policy_1_2": -146.0241241455078, "logps_train/policy_1_l": -140.59716796875, "logps_train/policy_1_w": -126.41014862060547, "logps_train/policy_2_2": -110.57034301757812, "logps_train/policy_2_w": -158.1886444091797, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 0.9038373827934265, "rewards_train/1-l": -1.4358878135681152, "rewards_train/1-w": 2.2347660064697266, "rewards_train/2-2": 2.4539027214050293, "rewards_train/2-w": 1.0592613220214844, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.670653820037842, "rewards_train/margins_1": 1.3309286236763, "rewards_train/margins_2": 1.394641399383545, "step": 213 }, { "epoch": 0.64, "logps_train/policy_1_2": -175.45639038085938, "logps_train/policy_1_l": -143.03848266601562, "logps_train/policy_1_w": -149.06838989257812, "logps_train/policy_2_2": -132.60586547851562, "logps_train/policy_2_w": -184.69786071777344, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.1731117963790894, "rewards_train/1-l": -1.069083333015442, "rewards_train/1-w": 2.922067403793335, "rewards_train/2-2": 2.3956637382507324, "rewards_train/2-w": 1.500526785850525, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.991150736808777, "rewards_train/margins_1": 1.7489556074142456, "rewards_train/margins_2": 0.8951369524002075, "step": 213 }, { "epoch": 0.64, "logps_train/policy_1_2": -134.8548583984375, "logps_train/policy_1_l": -115.4806900024414, "logps_train/policy_1_w": -120.46435546875, "logps_train/policy_2_2": -114.71859741210938, "logps_train/policy_2_w": -143.24984741210938, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.1457645893096924, "rewards_train/1-l": -0.9191138744354248, "rewards_train/1-w": 1.9969244003295898, "rewards_train/2-2": 1.4812653064727783, "rewards_train/2-w": 1.3711092472076416, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.9160382747650146, "rewards_train/margins_1": 0.8511598110198975, "rewards_train/margins_2": 0.11015605926513672, "step": 213 }, { "epoch": 0.64, "logps_train/policy_1_2": -113.86192321777344, "logps_train/policy_1_l": -167.77755737304688, "logps_train/policy_1_w": -102.05714416503906, "logps_train/policy_2_2": -87.001220703125, "logps_train/policy_2_w": -125.44476318359375, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.2044330835342407, "rewards_train/1-l": -1.0427932739257812, "rewards_train/1-w": 2.019285202026367, "rewards_train/2-2": 1.8998775482177734, "rewards_train/2-w": 1.602399230003357, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.0620784759521484, "rewards_train/margins_1": 0.8148521184921265, "rewards_train/margins_2": 0.2974783182144165, "step": 213 }, { "epoch": 0.64, "logps_train/policy_1_2": -152.55125427246094, "logps_train/policy_1_l": -364.12237548828125, "logps_train/policy_1_w": -150.72694396972656, "logps_train/policy_2_2": -115.66120910644531, "logps_train/policy_2_w": -193.81314086914062, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -326.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.0960464477539062, "rewards_train/1-l": -3.778606653213501, "rewards_train/1-w": 2.583556652069092, "rewards_train/2-2": 1.9948166608810425, "rewards_train/2-w": 1.4249354600906372, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.362163305282593, "rewards_train/margins_1": 1.4875102043151855, "rewards_train/margins_2": 0.5698812007904053, "step": 213 }, { "epoch": 0.64, "logps_train/policy_1_2": -134.0323486328125, "logps_train/policy_1_l": -150.77392578125, "logps_train/policy_1_w": -123.44236755371094, "logps_train/policy_2_2": -113.42900085449219, "logps_train/policy_2_w": -136.30526733398438, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.9014531970024109, "rewards_train/1-l": -0.8341317176818848, "rewards_train/1-w": 1.9240251779556274, "rewards_train/2-2": 1.8729201555252075, "rewards_train/2-w": 1.5487706661224365, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.758156895637512, "rewards_train/margins_1": 1.0225719809532166, "rewards_train/margins_2": 0.324149489402771, "step": 213 }, { "epoch": 0.64, "logps_train/policy_1_2": -166.0126495361328, "logps_train/policy_1_l": -164.84652709960938, "logps_train/policy_1_w": -168.8505401611328, "logps_train/policy_2_2": -131.22900390625, "logps_train/policy_2_w": -211.54193115234375, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.0596716403961182, "rewards_train/1-l": -1.456918478012085, "rewards_train/1-w": 2.3961963653564453, "rewards_train/2-2": 1.9005364179611206, "rewards_train/2-w": 1.0989320278167725, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8531148433685303, "rewards_train/margins_1": 1.3365247249603271, "rewards_train/margins_2": 0.8016043901443481, "step": 213 }, { "epoch": 0.64, "logps_train/policy_1_2": -117.64076232910156, "logps_train/policy_1_l": -74.58354187011719, "logps_train/policy_1_w": -63.18933868408203, "logps_train/policy_2_2": -96.89509582519531, "logps_train/policy_2_w": -81.1957015991211, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -61.5, "logps_train/ref_1_w": -76.0, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -89.5, "rewards_train/1-2": 0.5874865055084229, "rewards_train/1-l": -1.3307174444198608, "rewards_train/1-w": 1.3060662746429443, "rewards_train/2-2": 1.2911548614501953, "rewards_train/2-w": 0.817148745059967, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.636783719062805, "rewards_train/margins_1": 0.7185797691345215, "rewards_train/margins_2": 0.47400611639022827, "step": 213 }, { "epoch": 0.64, "learning_rate": 4.075219608954279e-06, "loss": 0.7867, "step": 214 }, { "epoch": 0.64, "logps_train/policy_1_2": -214.98748779296875, "logps_train/policy_1_l": -210.83871459960938, "logps_train/policy_1_w": -182.02349853515625, "logps_train/policy_2_2": -180.8678436279297, "logps_train/policy_2_w": -224.60977172851562, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.603986144065857, "rewards_train/1-l": -2.2272305488586426, "rewards_train/1-w": 3.179682731628418, "rewards_train/2-2": 3.0913407802581787, "rewards_train/2-w": 1.3835537433624268, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.4069132804870605, "rewards_train/margins_1": 1.575696587562561, "rewards_train/margins_2": 1.707787036895752, "step": 214 }, { "epoch": 0.64, "logps_train/policy_1_2": -158.61825561523438, "logps_train/policy_1_l": -133.90406799316406, "logps_train/policy_1_w": -103.61666870117188, "logps_train/policy_2_2": -140.78440856933594, "logps_train/policy_2_w": -119.49307250976562, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 0.8202060461044312, "rewards_train/1-l": -1.0082285404205322, "rewards_train/1-w": 1.6343777179718018, "rewards_train/2-2": 1.4340598583221436, "rewards_train/2-w": 1.363485336303711, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.642606258392334, "rewards_train/margins_1": 0.8141716718673706, "rewards_train/margins_2": 0.07057452201843262, "step": 214 }, { "epoch": 0.64, "logps_train/policy_1_2": -82.24795532226562, "logps_train/policy_1_l": -70.7193603515625, "logps_train/policy_1_w": -72.64566040039062, "logps_train/policy_2_2": -64.71024322509766, "logps_train/policy_2_w": -85.2628173828125, "logps_train/ref_1_2": -91.0, "logps_train/ref_1_l": -67.0, "logps_train/ref_1_w": -79.5, "logps_train/ref_2_2": -78.5, "logps_train/ref_2_w": -88.5, "rewards_train/1-2": 0.876766562461853, "rewards_train/1-l": -0.3900512456893921, "rewards_train/1-w": 0.6837257146835327, "rewards_train/2-2": 1.377803921699524, "rewards_train/2-w": 0.3119999170303345, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.0737769603729248, "rewards_train/margins_1": -0.1930408477783203, "rewards_train/margins_2": 1.0658040046691895, "step": 214 }, { "epoch": 0.64, "logps_train/policy_1_2": -202.73974609375, "logps_train/policy_1_l": -197.48287963867188, "logps_train/policy_1_w": -169.608154296875, "logps_train/policy_2_2": -165.7025909423828, "logps_train/policy_2_w": -215.95578002929688, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.4432125091552734, "rewards_train/1-l": -2.2170376777648926, "rewards_train/1-w": 2.662621259689331, "rewards_train/2-2": 2.6719284057617188, "rewards_train/2-w": 1.4841082096099854, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.879658937454224, "rewards_train/margins_1": 1.2194087505340576, "rewards_train/margins_2": 1.1878201961517334, "step": 214 }, { "epoch": 0.64, "logps_train/policy_1_2": -203.90719604492188, "logps_train/policy_1_l": -191.6254425048828, "logps_train/policy_1_w": -143.8851318359375, "logps_train/policy_2_2": -162.54052734375, "logps_train/policy_2_w": -187.94009399414062, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 2.028029441833496, "rewards_train/1-l": -1.7156693935394287, "rewards_train/1-w": 3.3427371978759766, "rewards_train/2-2": 3.333448886871338, "rewards_train/2-w": 1.630990982055664, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.058406591415405, "rewards_train/margins_1": 1.3147077560424805, "rewards_train/margins_2": 1.7024579048156738, "step": 214 }, { "epoch": 0.64, "logps_train/policy_1_2": -190.56149291992188, "logps_train/policy_1_l": -176.93032836914062, "logps_train/policy_1_w": -192.52691650390625, "logps_train/policy_2_2": -153.35653686523438, "logps_train/policy_2_w": -255.00439453125, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 2.306349277496338, "rewards_train/1-l": -1.631727695465088, "rewards_train/1-w": 3.119182586669922, "rewards_train/2-2": 3.361220598220825, "rewards_train/2-w": 1.4495600461959839, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.75091028213501, "rewards_train/margins_1": 0.812833309173584, "rewards_train/margins_2": 1.9116605520248413, "step": 214 }, { "epoch": 0.64, "logps_train/policy_1_2": -126.07270812988281, "logps_train/policy_1_l": -159.64694213867188, "logps_train/policy_1_w": -143.9427490234375, "logps_train/policy_2_2": -101.34590911865234, "logps_train/policy_2_w": -171.29061889648438, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.2380423545837402, "rewards_train/1-l": -1.4851045608520508, "rewards_train/1-w": 2.6354126930236816, "rewards_train/2-2": 1.9318153858184814, "rewards_train/2-w": 1.8006248474121094, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.120517253875732, "rewards_train/margins_1": 1.3973703384399414, "rewards_train/margins_2": 0.13119053840637207, "step": 214 }, { "epoch": 0.64, "logps_train/policy_1_2": -161.55715942382812, "logps_train/policy_1_l": -114.37843322753906, "logps_train/policy_1_w": -166.60809326171875, "logps_train/policy_2_2": -134.58676147460938, "logps_train/policy_2_w": -193.73822021484375, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.188035249710083, "rewards_train/1-l": -0.8278825283050537, "rewards_train/1-w": 2.59544038772583, "rewards_train/2-2": 2.194448947906494, "rewards_train/2-w": 1.713677167892456, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.423322916030884, "rewards_train/margins_1": 1.407405138015747, "rewards_train/margins_2": 0.4807717800140381, "step": 214 }, { "epoch": 0.64, "logps_train/policy_1_2": -162.457763671875, "logps_train/policy_1_l": -186.61724853515625, "logps_train/policy_1_w": -132.5800018310547, "logps_train/policy_2_2": -129.948974609375, "logps_train/policy_2_w": -165.76303100585938, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.514380693435669, "rewards_train/1-l": -2.2658259868621826, "rewards_train/1-w": 2.6701250076293945, "rewards_train/2-2": 2.3519790172576904, "rewards_train/2-w": 2.0111966133117676, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.935950994491577, "rewards_train/margins_1": 1.1557443141937256, "rewards_train/margins_2": 0.34078240394592285, "step": 215 }, { "epoch": 0.64, "logps_train/policy_1_2": -214.6181182861328, "logps_train/policy_1_l": -163.61944580078125, "logps_train/policy_1_w": -145.19744873046875, "logps_train/policy_2_2": -183.49136352539062, "logps_train/policy_2_w": -173.10601806640625, "logps_train/ref_1_2": -225.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.023343563079834, "rewards_train/1-l": -1.1627261638641357, "rewards_train/1-w": 2.038849353790283, "rewards_train/2-2": 2.039144992828369, "rewards_train/2-w": 1.2018980979919434, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.201575517654419, "rewards_train/margins_1": 1.0155057907104492, "rewards_train/margins_2": 0.8372468948364258, "step": 215 }, { "epoch": 0.64, "logps_train/policy_1_2": -164.5179443359375, "logps_train/policy_1_l": -161.66151428222656, "logps_train/policy_1_w": -96.41180419921875, "logps_train/policy_2_2": -132.00314331054688, "logps_train/policy_2_w": -134.55430603027344, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.0489872694015503, "rewards_train/1-l": -1.915077567100525, "rewards_train/1-w": 2.172687530517578, "rewards_train/2-2": 2.1176536083221436, "rewards_train/2-w": 1.1703498363494873, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.087765097618103, "rewards_train/margins_1": 1.1237002611160278, "rewards_train/margins_2": 0.9473037719726562, "step": 215 }, { "epoch": 0.64, "logps_train/policy_1_2": -145.16787719726562, "logps_train/policy_1_l": -134.38815307617188, "logps_train/policy_1_w": -124.99482727050781, "logps_train/policy_2_2": -116.85225677490234, "logps_train/policy_2_w": -155.2412109375, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 1.4003989696502686, "rewards_train/1-l": -1.4743626117706299, "rewards_train/1-w": 2.127080202102661, "rewards_train/2-2": 2.274930000305176, "rewards_train/2-w": 1.3782234191894531, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.601442813873291, "rewards_train/margins_1": 0.7266812324523926, "rewards_train/margins_2": 0.8967065811157227, "step": 215 }, { "epoch": 0.64, "logps_train/policy_1_2": -181.4497528076172, "logps_train/policy_1_l": -270.7052917480469, "logps_train/policy_1_w": -141.04415893554688, "logps_train/policy_2_2": -131.62789916992188, "logps_train/policy_2_w": -190.51467895507812, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -247.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.605024814605713, "rewards_train/1-l": -2.4220938682556152, "rewards_train/1-w": 2.5151162147521973, "rewards_train/2-2": 2.3090853691101074, "rewards_train/2-w": 0.9610328674316406, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.9372100830078125, "rewards_train/margins_1": 0.9100914001464844, "rewards_train/margins_2": 1.3480525016784668, "step": 215 }, { "epoch": 0.64, "logps_train/policy_1_2": -98.12303924560547, "logps_train/policy_1_l": -83.30429077148438, "logps_train/policy_1_w": -62.28309631347656, "logps_train/policy_2_2": -75.70638275146484, "logps_train/policy_2_w": -77.0833511352539, "logps_train/ref_1_2": -106.0, "logps_train/ref_1_l": -75.0, "logps_train/ref_1_w": -74.5, "logps_train/ref_2_2": -89.0, "logps_train/ref_2_w": -88.0, "rewards_train/1-2": 0.7699228525161743, "rewards_train/1-l": -0.803476095199585, "rewards_train/1-w": 1.233018159866333, "rewards_train/2-2": 1.326138973236084, "rewards_train/2-w": 1.0604145526885986, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.036494255065918, "rewards_train/margins_1": 0.4630953073501587, "rewards_train/margins_2": 0.26572442054748535, "step": 215 }, { "epoch": 0.64, "logps_train/policy_1_2": -195.12271118164062, "logps_train/policy_1_l": -177.01412963867188, "logps_train/policy_1_w": -112.99797058105469, "logps_train/policy_2_2": -168.99478149414062, "logps_train/policy_2_w": -134.62600708007812, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.4846036434173584, "rewards_train/1-l": -1.9619603157043457, "rewards_train/1-w": 2.3841872215270996, "rewards_train/2-2": 1.9598954916000366, "rewards_train/2-w": 1.8848609924316406, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.346147537231445, "rewards_train/margins_1": 0.8995835781097412, "rewards_train/margins_2": 0.075034499168396, "step": 215 }, { "epoch": 0.64, "logps_train/policy_1_2": -153.1189727783203, "logps_train/policy_1_l": -164.48431396484375, "logps_train/policy_1_w": -159.82357788085938, "logps_train/policy_2_2": -120.05706024169922, "logps_train/policy_2_w": -199.641357421875, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": 1.4115405082702637, "rewards_train/1-l": -1.1421812772750854, "rewards_train/1-w": 2.620767831802368, "rewards_train/2-2": 2.455231189727783, "rewards_train/2-w": 1.129613995552063, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7629491090774536, "rewards_train/margins_1": 1.2092273235321045, "rewards_train/margins_2": 1.3256171941757202, "step": 215 }, { "epoch": 0.65, "learning_rate": 4.05596442217179e-06, "loss": 0.6738, "step": 216 }, { "epoch": 0.65, "logps_train/policy_1_2": -154.7963409423828, "logps_train/policy_1_l": -149.92510986328125, "logps_train/policy_1_w": -91.49282836914062, "logps_train/policy_2_2": -118.57654571533203, "logps_train/policy_2_w": -113.77705383300781, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.7516162395477295, "rewards_train/1-l": -0.5120428204536438, "rewards_train/1-w": 1.9163421392440796, "rewards_train/2-2": 2.575157642364502, "rewards_train/2-w": 1.2183884382247925, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.4283849596977234, "rewards_train/margins_1": 0.1647258996963501, "rewards_train/margins_2": 1.3567692041397095, "step": 216 }, { "epoch": 0.65, "logps_train/policy_1_2": -166.90658569335938, "logps_train/policy_1_l": -178.80923461914062, "logps_train/policy_1_w": -119.93179321289062, "logps_train/policy_2_2": -137.11782836914062, "logps_train/policy_2_w": -151.82431030273438, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.5843403339385986, "rewards_train/1-l": -1.4020156860351562, "rewards_train/1-w": 2.2034997940063477, "rewards_train/2-2": 2.2350926399230957, "rewards_train/2-w": 1.1804606914520264, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.605515480041504, "rewards_train/margins_1": 0.619159460067749, "rewards_train/margins_2": 1.0546319484710693, "step": 216 }, { "epoch": 0.65, "logps_train/policy_1_2": -198.2981414794922, "logps_train/policy_1_l": -144.16587829589844, "logps_train/policy_1_w": -169.5169219970703, "logps_train/policy_2_2": -155.80157470703125, "logps_train/policy_2_w": -215.1227264404297, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -223.0, "rewards_train/1-2": 0.23542070388793945, "rewards_train/1-l": -2.322350263595581, "rewards_train/1-w": 1.9959636926651, "rewards_train/2-2": 1.6319513320922852, "rewards_train/2-w": 0.7814780473709106, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.318313956260681, "rewards_train/margins_1": 1.7605429887771606, "rewards_train/margins_2": 0.8504732847213745, "step": 216 }, { "epoch": 0.65, "logps_train/policy_1_2": -160.68235778808594, "logps_train/policy_1_l": -124.66424560546875, "logps_train/policy_1_w": -102.00614929199219, "logps_train/policy_2_2": -123.95442199707031, "logps_train/policy_2_w": -126.52971649169922, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -110.5, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 0.6411395072937012, "rewards_train/1-l": -1.4160337448120117, "rewards_train/1-w": 2.0821971893310547, "rewards_train/2-2": 2.0584635734558105, "rewards_train/2-w": 1.3542542457580566, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4982309341430664, "rewards_train/margins_1": 1.4410576820373535, "rewards_train/margins_2": 0.7042093276977539, "step": 216 }, { "epoch": 0.65, "logps_train/policy_1_2": -113.76448059082031, "logps_train/policy_1_l": -135.69735717773438, "logps_train/policy_1_w": -72.19738006591797, "logps_train/policy_2_2": -94.98750305175781, "logps_train/policy_2_w": -88.45570373535156, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -102.0, "rewards_train/1-2": 1.4610521793365479, "rewards_train/1-l": -2.22286057472229, "rewards_train/1-w": 1.7755744457244873, "rewards_train/2-2": 1.913750410079956, "rewards_train/2-w": 1.3403674364089966, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9984350204467773, "rewards_train/margins_1": 0.31452226638793945, "rewards_train/margins_2": 0.5733829736709595, "step": 216 }, { "epoch": 0.65, "logps_train/policy_1_2": -108.97822570800781, "logps_train/policy_1_l": -130.54161071777344, "logps_train/policy_1_w": -80.48066711425781, "logps_train/policy_2_2": -93.25618743896484, "logps_train/policy_2_w": -88.6993408203125, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -88.0, "logps_train/ref_2_2": -110.5, "logps_train/ref_2_w": -94.0, "rewards_train/1-2": 1.4162406921386719, "rewards_train/1-l": -0.5955673456192017, "rewards_train/1-w": 0.7152144908905029, "rewards_train/2-2": 1.7142248153686523, "rewards_train/2-w": 0.5613157153129578, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.3107818365097046, "rewards_train/margins_1": -0.701026201248169, "rewards_train/margins_2": 1.1529091000556946, "step": 216 }, { "epoch": 0.65, "logps_train/policy_1_2": -139.11659240722656, "logps_train/policy_1_l": -128.59056091308594, "logps_train/policy_1_w": -113.19951629638672, "logps_train/policy_2_2": -107.66336059570312, "logps_train/policy_2_w": -141.81300354003906, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 0.6539661884307861, "rewards_train/1-l": -1.477805733680725, "rewards_train/1-w": 1.6976265907287598, "rewards_train/2-2": 1.3571019172668457, "rewards_train/2-w": 0.6093246340751648, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.175432324409485, "rewards_train/margins_1": 1.0436604022979736, "rewards_train/margins_2": 0.7477772831916809, "step": 216 }, { "epoch": 0.65, "logps_train/policy_1_2": -201.152099609375, "logps_train/policy_1_l": -172.0582275390625, "logps_train/policy_1_w": -141.95864868164062, "logps_train/policy_2_2": -159.8940887451172, "logps_train/policy_2_w": -196.0819091796875, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 0.7539310455322266, "rewards_train/1-l": -1.3042593002319336, "rewards_train/1-w": 2.911947727203369, "rewards_train/2-2": 1.7156696319580078, "rewards_train/2-w": 0.9177848100662231, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.216207027435303, "rewards_train/margins_1": 2.1580166816711426, "rewards_train/margins_2": 0.7978848218917847, "step": 216 }, { "epoch": 0.65, "logps_train/policy_1_2": -163.8013153076172, "logps_train/policy_1_l": -128.71530151367188, "logps_train/policy_1_w": -91.00840759277344, "logps_train/policy_2_2": -131.47703552246094, "logps_train/policy_2_w": -115.70059204101562, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 1.647992730140686, "rewards_train/1-l": -0.8449680209159851, "rewards_train/1-w": 2.5288467407226562, "rewards_train/2-2": 2.6460466384887695, "rewards_train/2-w": 1.9174407720565796, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.3738147616386414, "rewards_train/margins_1": 0.8808540105819702, "rewards_train/margins_2": 0.7286058664321899, "step": 217 }, { "epoch": 0.65, "logps_train/policy_1_2": -230.54705810546875, "logps_train/policy_1_l": -202.706298828125, "logps_train/policy_1_w": -163.81130981445312, "logps_train/policy_2_2": -199.50225830078125, "logps_train/policy_2_w": -198.14382934570312, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -227.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.395294427871704, "rewards_train/1-l": -2.159496784210205, "rewards_train/1-w": 2.629415988922119, "rewards_train/2-2": 2.7232108116149902, "rewards_train/2-w": 1.6778056621551514, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.788912773132324, "rewards_train/margins_1": 1.234121561050415, "rewards_train/margins_2": 1.0454051494598389, "step": 217 }, { "epoch": 0.65, "logps_train/policy_1_2": -149.77891540527344, "logps_train/policy_1_l": -133.1887664794922, "logps_train/policy_1_w": -71.4427261352539, "logps_train/policy_2_2": -125.14958953857422, "logps_train/policy_2_w": -94.12413024902344, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -81.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -100.0, "rewards_train/1-2": 1.2158584594726562, "rewards_train/1-l": -1.7907516956329346, "rewards_train/1-w": 0.985414981842041, "rewards_train/2-2": 1.7689272165298462, "rewards_train/2-w": 0.6239153146743774, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7761666774749756, "rewards_train/margins_1": -0.23044347763061523, "rewards_train/margins_2": 1.1450119018554688, "step": 217 }, { "epoch": 0.65, "logps_train/policy_1_2": -202.4039306640625, "logps_train/policy_1_l": -209.52459716796875, "logps_train/policy_1_w": -158.32843017578125, "logps_train/policy_2_2": -174.19741821289062, "logps_train/policy_2_w": -187.14437866210938, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.8627313375473022, "rewards_train/1-l": -2.3555846214294434, "rewards_train/1-w": 2.493720531463623, "rewards_train/2-2": 2.8333821296691895, "rewards_train/2-w": 1.6855626106262207, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.849305152893066, "rewards_train/margins_1": 0.6309891939163208, "rewards_train/margins_2": 1.1478195190429688, "step": 217 }, { "epoch": 0.65, "logps_train/policy_1_2": -186.12881469726562, "logps_train/policy_1_l": -140.93624877929688, "logps_train/policy_1_w": -74.37972259521484, "logps_train/policy_2_2": -135.7738494873047, "logps_train/policy_2_w": -100.86833190917969, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -112.0, "rewards_train/1-2": 1.1152427196502686, "rewards_train/1-l": -1.80299973487854, "rewards_train/1-w": 1.7042152881622314, "rewards_train/2-2": 2.4226150512695312, "rewards_train/2-w": 1.1319167613983154, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5072150230407715, "rewards_train/margins_1": 0.5889725685119629, "rewards_train/margins_2": 1.2906982898712158, "step": 217 }, { "epoch": 0.65, "logps_train/policy_1_2": -80.89857482910156, "logps_train/policy_1_l": -91.5201644897461, "logps_train/policy_1_w": -108.77835083007812, "logps_train/policy_2_2": -69.17060852050781, "logps_train/policy_2_w": -139.82196044921875, "logps_train/ref_1_2": -88.0, "logps_train/ref_1_l": -77.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -78.5, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.7089704275131226, "rewards_train/1-l": -1.4481104612350464, "rewards_train/1-w": 1.8682583570480347, "rewards_train/2-2": 0.9540332555770874, "rewards_train/2-w": 1.2459299564361572, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.316368818283081, "rewards_train/margins_1": 1.159287929534912, "rewards_train/margins_2": -0.2918967008590698, "step": 217 }, { "epoch": 0.65, "logps_train/policy_1_2": -134.22879028320312, "logps_train/policy_1_l": -64.72640991210938, "logps_train/policy_1_w": -90.92359924316406, "logps_train/policy_2_2": -118.72677612304688, "logps_train/policy_2_w": -116.80845642089844, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -61.0, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.106808066368103, "rewards_train/1-l": -0.3951503038406372, "rewards_train/1-w": 1.6384990215301514, "rewards_train/2-2": 1.439822793006897, "rewards_train/2-w": 0.9613412618637085, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.0336493253707886, "rewards_train/margins_1": 0.5316909551620483, "rewards_train/margins_2": 0.4784815311431885, "step": 217 }, { "epoch": 0.65, "logps_train/policy_1_2": -114.96043395996094, "logps_train/policy_1_l": -198.44235229492188, "logps_train/policy_1_w": -105.20928955078125, "logps_train/policy_2_2": -90.94905090332031, "logps_train/policy_2_w": -120.0434799194336, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.5367687940597534, "rewards_train/1-l": -2.3415000438690186, "rewards_train/1-w": 0.9379085898399353, "rewards_train/2-2": 2.1050944328308105, "rewards_train/2-w": 0.24828879535198212, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.279408633708954, "rewards_train/margins_1": -0.5988602042198181, "rewards_train/margins_2": 1.8568056374788284, "step": 217 }, { "epoch": 0.65, "learning_rate": 4.036557376004694e-06, "loss": 0.8615, "step": 218 }, { "epoch": 0.65, "logps_train/policy_1_2": -101.89903259277344, "logps_train/policy_1_l": -122.86550903320312, "logps_train/policy_1_w": -93.6323471069336, "logps_train/policy_2_2": -76.90706634521484, "logps_train/policy_2_w": -126.41352844238281, "logps_train/ref_1_2": -107.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -88.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 0.48245999217033386, "rewards_train/1-l": -1.0252227783203125, "rewards_train/1-w": 1.0406720638275146, "rewards_train/2-2": 1.134488582611084, "rewards_train/2-w": 0.27739715576171875, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.065894842147827, "rewards_train/margins_1": 0.5582120716571808, "rewards_train/margins_2": 0.8570914268493652, "step": 218 }, { "epoch": 0.65, "logps_train/policy_1_2": -136.3690643310547, "logps_train/policy_1_l": -139.10763549804688, "logps_train/policy_1_w": -119.18142700195312, "logps_train/policy_2_2": -108.62275695800781, "logps_train/policy_2_w": -147.8846435546875, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.774031162261963, "rewards_train/1-l": -1.424826741218567, "rewards_train/1-w": 2.100606918334961, "rewards_train/2-2": 2.447099208831787, "rewards_train/2-w": 1.4326294660568237, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.525433659553528, "rewards_train/margins_1": 0.32657575607299805, "rewards_train/margins_2": 1.0144697427749634, "step": 218 }, { "epoch": 0.65, "logps_train/policy_1_2": -145.67030334472656, "logps_train/policy_1_l": -114.22994995117188, "logps_train/policy_1_w": -121.13253784179688, "logps_train/policy_2_2": -116.38954162597656, "logps_train/policy_2_w": -161.73854064941406, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -104.5, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": 1.9439071416854858, "rewards_train/1-l": -0.9593226313591003, "rewards_train/1-w": 2.4047152996063232, "rewards_train/2-2": 2.765733242034912, "rewards_train/2-w": 1.0991929769515991, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3640379309654236, "rewards_train/margins_1": 0.4608081579208374, "rewards_train/margins_2": 1.666540265083313, "step": 218 }, { "epoch": 0.65, "logps_train/policy_1_2": -214.34527587890625, "logps_train/policy_1_l": -219.7606201171875, "logps_train/policy_1_w": -171.71275329589844, "logps_train/policy_2_2": -188.79698181152344, "logps_train/policy_2_w": -198.6884002685547, "logps_train/ref_1_2": -233.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 1.8295351266860962, "rewards_train/1-l": -2.5121970176696777, "rewards_train/1-w": 2.4193499088287354, "rewards_train/2-2": 2.6515517234802246, "rewards_train/2-w": 1.763972282409668, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.931546926498413, "rewards_train/margins_1": 0.5898147821426392, "rewards_train/margins_2": 0.8875794410705566, "step": 218 }, { "epoch": 0.65, "logps_train/policy_1_2": -182.48690795898438, "logps_train/policy_1_l": -191.85736083984375, "logps_train/policy_1_w": -143.283935546875, "logps_train/policy_2_2": -139.5946502685547, "logps_train/policy_2_w": -183.74826049804688, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 0.2227935492992401, "rewards_train/1-l": -1.9355404376983643, "rewards_train/1-w": 2.3396730422973633, "rewards_train/2-2": 2.1780357360839844, "rewards_train/2-w": 1.286306381225586, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.2752134799957275, "rewards_train/margins_1": 2.116879492998123, "rewards_train/margins_2": 0.8917293548583984, "step": 218 }, { "epoch": 0.65, "logps_train/policy_1_2": -123.76814270019531, "logps_train/policy_1_l": -105.01895141601562, "logps_train/policy_1_w": -87.33899688720703, "logps_train/policy_2_2": -100.83714294433594, "logps_train/policy_2_w": -115.21952056884766, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 0.30666613578796387, "rewards_train/1-l": -1.3667383193969727, "rewards_train/1-w": 1.3879752159118652, "rewards_train/2-2": 0.9789084792137146, "rewards_train/2-w": 1.0217978954315186, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.754713535308838, "rewards_train/margins_1": 1.0813090801239014, "rewards_train/margins_2": -0.042889416217803955, "step": 218 }, { "epoch": 0.65, "logps_train/policy_1_2": -119.85157012939453, "logps_train/policy_1_l": -236.49183654785156, "logps_train/policy_1_w": -120.11529541015625, "logps_train/policy_2_2": -101.8673095703125, "logps_train/policy_2_w": -142.97140502929688, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.8578121662139893, "rewards_train/1-l": -3.2474260330200195, "rewards_train/1-w": 1.4785089492797852, "rewards_train/2-2": 2.2788944244384766, "rewards_train/2-w": 1.0637969970703125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.725934982299805, "rewards_train/margins_1": -0.3793032169342041, "rewards_train/margins_2": 1.215097427368164, "step": 218 }, { "epoch": 0.65, "logps_train/policy_1_2": -165.46484375, "logps_train/policy_1_l": -209.54867553710938, "logps_train/policy_1_w": -148.2201385498047, "logps_train/policy_2_2": -135.39198303222656, "logps_train/policy_2_w": -186.4582061767578, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 0.9144521951675415, "rewards_train/1-l": -2.3006677627563477, "rewards_train/1-w": 2.5647051334381104, "rewards_train/2-2": 2.2670516967773438, "rewards_train/2-w": 1.4229291677474976, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.865372896194458, "rewards_train/margins_1": 1.6502529382705688, "rewards_train/margins_2": 0.8441225290298462, "step": 218 }, { "epoch": 0.66, "logps_train/policy_1_2": -132.7526092529297, "logps_train/policy_1_l": -58.587066650390625, "logps_train/policy_1_w": -40.88406753540039, "logps_train/policy_2_2": -106.77105712890625, "logps_train/policy_2_w": -58.704795837402344, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -52.5, "logps_train/ref_1_w": -48.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -64.0, "rewards_train/1-2": 0.6815754175186157, "rewards_train/1-l": -0.5979645848274231, "rewards_train/1-w": 0.7061243057250977, "rewards_train/2-2": 1.4482848644256592, "rewards_train/2-w": 0.5638956427574158, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.3040888905525208, "rewards_train/margins_1": 0.024548888206481934, "rewards_train/margins_2": 0.8843892216682434, "step": 219 }, { "epoch": 0.66, "logps_train/policy_1_2": -174.8319549560547, "logps_train/policy_1_l": -198.95697021484375, "logps_train/policy_1_w": -119.86077117919922, "logps_train/policy_2_2": -138.48077392578125, "logps_train/policy_2_w": -164.88516235351562, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.8230540752410889, "rewards_train/1-l": -1.8460884094238281, "rewards_train/1-w": 1.9627509117126465, "rewards_train/2-2": 2.6034865379333496, "rewards_train/2-w": 1.072421669960022, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8088393211364746, "rewards_train/margins_1": 0.13969683647155762, "rewards_train/margins_2": 1.5310648679733276, "step": 219 }, { "epoch": 0.66, "logps_train/policy_1_2": -158.21612548828125, "logps_train/policy_1_l": -153.69390869140625, "logps_train/policy_1_w": -131.70144653320312, "logps_train/policy_2_2": -124.59062194824219, "logps_train/policy_2_w": -167.70004272460938, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 0.918231725692749, "rewards_train/1-l": -2.2570858001708984, "rewards_train/1-w": 2.7814176082611084, "rewards_train/2-2": 2.0741403102874756, "rewards_train/2-w": 1.5909337997436523, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.038503408432007, "rewards_train/margins_1": 1.8631858825683594, "rewards_train/margins_2": 0.48320651054382324, "step": 219 }, { "epoch": 0.66, "logps_train/policy_1_2": -136.16624450683594, "logps_train/policy_1_l": -119.28555297851562, "logps_train/policy_1_w": -138.41073608398438, "logps_train/policy_2_2": -109.31824493408203, "logps_train/policy_2_w": -180.00537109375, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 0.7197040915489197, "rewards_train/1-l": -1.1035550832748413, "rewards_train/1-w": 2.313613176345825, "rewards_train/2-2": 1.6283316612243652, "rewards_train/2-w": 0.8572750091552734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4171682596206665, "rewards_train/margins_1": 1.5939090847969055, "rewards_train/margins_2": 0.7710566520690918, "step": 219 }, { "epoch": 0.66, "logps_train/policy_1_2": -209.66195678710938, "logps_train/policy_1_l": -195.750244140625, "logps_train/policy_1_w": -141.5804443359375, "logps_train/policy_2_2": -176.73211669921875, "logps_train/policy_2_w": -168.67849731445312, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 0.6779454350471497, "rewards_train/1-l": -2.4156494140625, "rewards_train/1-w": 2.5642213821411133, "rewards_train/2-2": 2.0735650062561035, "rewards_train/2-w": 1.7133033275604248, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.979870796203613, "rewards_train/margins_1": 1.8862759470939636, "rewards_train/margins_2": 0.3602616786956787, "step": 219 }, { "epoch": 0.66, "logps_train/policy_1_2": -163.32806396484375, "logps_train/policy_1_l": -210.37460327148438, "logps_train/policy_1_w": -145.80770874023438, "logps_train/policy_2_2": -126.89582824707031, "logps_train/policy_2_w": -185.86212158203125, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.2185598611831665, "rewards_train/1-l": -2.477499485015869, "rewards_train/1-w": 3.0348551273345947, "rewards_train/2-2": 1.9953289031982422, "rewards_train/2-w": 1.5794122219085693, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.512354612350464, "rewards_train/margins_1": 1.8162952661514282, "rewards_train/margins_2": 0.41591668128967285, "step": 219 }, { "epoch": 0.66, "logps_train/policy_1_2": -111.03152465820312, "logps_train/policy_1_l": -114.44670104980469, "logps_train/policy_1_w": -114.33836364746094, "logps_train/policy_2_2": -86.51492309570312, "logps_train/policy_2_w": -145.3902587890625, "logps_train/ref_1_2": -122.0, "logps_train/ref_1_l": -97.5, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.0921599864959717, "rewards_train/1-l": -1.7110273838043213, "rewards_train/1-w": 2.301710605621338, "rewards_train/2-2": 1.6188199520111084, "rewards_train/2-w": 1.245349407196045, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.012737989425659, "rewards_train/margins_1": 1.2095506191253662, "rewards_train/margins_2": 0.3734705448150635, "step": 219 }, { "epoch": 0.66, "logps_train/policy_1_2": -177.65408325195312, "logps_train/policy_1_l": -125.30905151367188, "logps_train/policy_1_w": -142.05572509765625, "logps_train/policy_2_2": -145.71212768554688, "logps_train/policy_2_w": -168.5413818359375, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.86427903175354, "rewards_train/1-l": -1.3736780881881714, "rewards_train/1-w": 1.7998969554901123, "rewards_train/2-2": 2.7662875652313232, "rewards_train/2-w": 0.9552363753318787, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.1735750436782837, "rewards_train/margins_1": -0.06438207626342773, "rewards_train/margins_2": 1.8110511898994446, "step": 219 }, { "epoch": 0.66, "learning_rate": 4.0170003645464835e-06, "loss": 0.7671, "step": 220 }, { "epoch": 0.66, "logps_train/policy_1_2": -82.19890594482422, "logps_train/policy_1_l": -61.66587448120117, "logps_train/policy_1_w": -41.99485778808594, "logps_train/policy_2_2": -66.29547119140625, "logps_train/policy_2_w": -57.42498016357422, "logps_train/ref_1_2": -87.5, "logps_train/ref_1_l": -52.0, "logps_train/ref_1_w": -54.5, "logps_train/ref_2_2": -78.0, "logps_train/ref_2_w": -65.5, "rewards_train/1-2": 0.5248846411705017, "rewards_train/1-l": -0.9696145057678223, "rewards_train/1-w": 1.2552015781402588, "rewards_train/2-2": 1.1563297510147095, "rewards_train/2-w": 0.8057445287704468, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.224816083908081, "rewards_train/margins_1": 0.7303169369697571, "rewards_train/margins_2": 0.3505852222442627, "step": 220 }, { "epoch": 0.66, "logps_train/policy_1_2": -187.40325927734375, "logps_train/policy_1_l": -152.10891723632812, "logps_train/policy_1_w": -127.38314056396484, "logps_train/policy_2_2": -150.45396423339844, "logps_train/policy_2_w": -165.02035522460938, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": 1.5127999782562256, "rewards_train/1-l": -1.8136249780654907, "rewards_train/1-w": 2.3054358959198, "rewards_train/2-2": 2.462611675262451, "rewards_train/2-w": 1.1854639053344727, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.1190608739852905, "rewards_train/margins_1": 0.7926359176635742, "rewards_train/margins_2": 1.2771477699279785, "step": 220 }, { "epoch": 0.66, "logps_train/policy_1_2": -142.2821044921875, "logps_train/policy_1_l": -177.09410095214844, "logps_train/policy_1_w": -80.58959197998047, "logps_train/policy_2_2": -120.1815414428711, "logps_train/policy_2_w": -105.03348541259766, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -99.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 1.270227313041687, "rewards_train/1-l": -1.8949579000473022, "rewards_train/1-w": 1.8328380584716797, "rewards_train/2-2": 1.897861361503601, "rewards_train/2-w": 1.4935264587402344, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.727795958518982, "rewards_train/margins_1": 0.5626107454299927, "rewards_train/margins_2": 0.4043349027633667, "step": 220 }, { "epoch": 0.66, "logps_train/policy_1_2": -150.14382934570312, "logps_train/policy_1_l": -148.05052185058594, "logps_train/policy_1_w": -98.52792358398438, "logps_train/policy_2_2": -123.11878967285156, "logps_train/policy_2_w": -134.13674926757812, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 1.0613980293273926, "rewards_train/1-l": -1.8775129318237305, "rewards_train/1-w": 2.77533221244812, "rewards_train/2-2": 1.9834328889846802, "rewards_train/2-w": 1.5191384553909302, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.652845144271851, "rewards_train/margins_1": 1.7139341831207275, "rewards_train/margins_2": 0.46429443359375, "step": 220 }, { "epoch": 0.66, "logps_train/policy_1_2": -154.36260986328125, "logps_train/policy_1_l": -156.31764221191406, "logps_train/policy_1_w": -158.64013671875, "logps_train/policy_2_2": -129.1887969970703, "logps_train/policy_2_w": -192.1660614013672, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.8418629169464111, "rewards_train/1-l": -1.2313737869262695, "rewards_train/1-w": 2.869579315185547, "rewards_train/2-2": 2.754167318344116, "rewards_train/2-w": 1.585737705230713, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.100953102111816, "rewards_train/margins_1": 1.0277163982391357, "rewards_train/margins_2": 1.1684296131134033, "step": 220 }, { "epoch": 0.66, "logps_train/policy_1_2": -126.34326934814453, "logps_train/policy_1_l": -76.63934326171875, "logps_train/policy_1_w": -101.47895812988281, "logps_train/policy_2_2": -93.8353271484375, "logps_train/policy_2_w": -133.78367614746094, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -67.5, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.9781736731529236, "rewards_train/1-l": -0.9154967069625854, "rewards_train/1-w": 2.183354616165161, "rewards_train/2-2": 1.874279499053955, "rewards_train/2-w": 1.1591325998306274, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0988513231277466, "rewards_train/margins_1": 1.2051809430122375, "rewards_train/margins_2": 0.7151468992233276, "step": 220 }, { "epoch": 0.66, "logps_train/policy_1_2": -171.27847290039062, "logps_train/policy_1_l": -172.8917999267578, "logps_train/policy_1_w": -136.85223388671875, "logps_train/policy_2_2": -144.69818115234375, "logps_train/policy_2_w": -167.0944366455078, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.7815265655517578, "rewards_train/1-l": -1.3540244102478027, "rewards_train/1-w": 3.0257153511047363, "rewards_train/2-2": 2.652056932449341, "rewards_train/2-w": 2.206181526184082, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.379739761352539, "rewards_train/margins_1": 1.2441887855529785, "rewards_train/margins_2": 0.4458754062652588, "step": 220 }, { "epoch": 0.66, "logps_train/policy_1_2": -145.2027130126953, "logps_train/policy_1_l": -209.8771209716797, "logps_train/policy_1_w": -183.8225555419922, "logps_train/policy_2_2": -113.05081939697266, "logps_train/policy_2_w": -233.31182861328125, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -244.0, "rewards_train/1-2": 1.107853651046753, "rewards_train/1-l": -2.3990395069122314, "rewards_train/1-w": 2.6657910346984863, "rewards_train/2-2": 1.7824174165725708, "rewards_train/2-w": 1.0516278743743896, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.064830541610718, "rewards_train/margins_1": 1.5579373836517334, "rewards_train/margins_2": 0.7307895421981812, "step": 220 }, { "epoch": 0.66, "logps_train/policy_1_2": -115.27933502197266, "logps_train/policy_1_l": -160.67453002929688, "logps_train/policy_1_w": -136.8870849609375, "logps_train/policy_2_2": -91.06387329101562, "logps_train/policy_2_w": -187.59921264648438, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 1.1286094188690186, "rewards_train/1-l": -1.24391770362854, "rewards_train/1-w": 2.1331653594970703, "rewards_train/2-2": 1.8187098503112793, "rewards_train/2-w": 1.393204689025879, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3770830631256104, "rewards_train/margins_1": 1.0045559406280518, "rewards_train/margins_2": 0.4255051612854004, "step": 221 }, { "epoch": 0.66, "logps_train/policy_1_2": -163.08938598632812, "logps_train/policy_1_l": -160.32281494140625, "logps_train/policy_1_w": -108.38137817382812, "logps_train/policy_2_2": -135.96441650390625, "logps_train/policy_2_w": -130.65196228027344, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 1.8566862344741821, "rewards_train/1-l": -1.3927301168441772, "rewards_train/1-w": 1.920455813407898, "rewards_train/2-2": 2.26762056350708, "rewards_train/2-w": 1.25902259349823, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.313185930252075, "rewards_train/margins_1": 0.06376957893371582, "rewards_train/margins_2": 1.00859797000885, "step": 221 }, { "epoch": 0.66, "logps_train/policy_1_2": -162.2019805908203, "logps_train/policy_1_l": -191.54364013671875, "logps_train/policy_1_w": -127.73675537109375, "logps_train/policy_2_2": -132.20376586914062, "logps_train/policy_2_w": -163.27163696289062, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.3516767024993896, "rewards_train/1-l": -0.9539735317230225, "rewards_train/1-w": 2.0146055221557617, "rewards_train/2-2": 2.3304057121276855, "rewards_train/2-w": 1.2415857315063477, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.968579053878784, "rewards_train/margins_1": 0.6629288196563721, "rewards_train/margins_2": 1.088819980621338, "step": 221 }, { "epoch": 0.66, "logps_train/policy_1_2": -245.02279663085938, "logps_train/policy_1_l": -202.02108764648438, "logps_train/policy_1_w": -194.70309448242188, "logps_train/policy_2_2": -219.16610717773438, "logps_train/policy_2_w": -226.5758056640625, "logps_train/ref_1_2": -264.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -219.0, "logps_train/ref_2_2": -247.0, "logps_train/ref_2_w": -242.0, "rewards_train/1-2": 1.9852209091186523, "rewards_train/1-l": -1.433359146118164, "rewards_train/1-w": 2.454691171646118, "rewards_train/2-2": 2.727137565612793, "rewards_train/2-w": 1.4892933368682861, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8880503177642822, "rewards_train/margins_1": 0.4694702625274658, "rewards_train/margins_2": 1.2378442287445068, "step": 221 }, { "epoch": 0.66, "logps_train/policy_1_2": -148.94451904296875, "logps_train/policy_1_l": -79.0900650024414, "logps_train/policy_1_w": -102.91944885253906, "logps_train/policy_2_2": -115.6132583618164, "logps_train/policy_2_w": -126.46309661865234, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 0.9352352023124695, "rewards_train/1-l": -0.9148657321929932, "rewards_train/1-w": 1.2416479587554932, "rewards_train/2-2": 2.1675801277160645, "rewards_train/2-w": 0.4560343325138092, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.1565136909484863, "rewards_train/margins_1": 0.3064127564430237, "rewards_train/margins_2": 1.7115457952022552, "step": 221 }, { "epoch": 0.66, "logps_train/policy_1_2": -117.39041900634766, "logps_train/policy_1_l": -96.00646209716797, "logps_train/policy_1_w": -106.22232055664062, "logps_train/policy_2_2": -95.6978759765625, "logps_train/policy_2_w": -126.00444030761719, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 0.9304900169372559, "rewards_train/1-l": -0.7600208520889282, "rewards_train/1-w": 2.276987075805664, "rewards_train/2-2": 1.4380249977111816, "rewards_train/2-w": 1.662837266921997, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.0370079278945923, "rewards_train/margins_1": 1.3464970588684082, "rewards_train/margins_2": -0.22481226921081543, "step": 221 }, { "epoch": 0.66, "logps_train/policy_1_2": -123.55278778076172, "logps_train/policy_1_l": -141.2139892578125, "logps_train/policy_1_w": -86.76373291015625, "logps_train/policy_2_2": -99.17713165283203, "logps_train/policy_2_w": -116.20507049560547, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -109.5, "logps_train/ref_2_2": -117.5, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.0802680253982544, "rewards_train/1-l": -1.9213016033172607, "rewards_train/1-w": 2.244720220565796, "rewards_train/2-2": 1.8599236011505127, "rewards_train/2-w": 1.4107433557510376, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.166021823883057, "rewards_train/margins_1": 1.1644521951675415, "rewards_train/margins_2": 0.4491802453994751, "step": 221 }, { "epoch": 0.66, "logps_train/policy_1_2": -202.0738525390625, "logps_train/policy_1_l": -251.33831787109375, "logps_train/policy_1_w": -156.46347045898438, "logps_train/policy_2_2": -161.55221557617188, "logps_train/policy_2_w": -215.02777099609375, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.4113649129867554, "rewards_train/1-l": -2.552582263946533, "rewards_train/1-w": 2.4911537170410156, "rewards_train/2-2": 2.901028871536255, "rewards_train/2-w": 0.6722223162651062, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.043735980987549, "rewards_train/margins_1": 1.0797888040542603, "rewards_train/margins_2": 2.2288065552711487, "step": 221 }, { "epoch": 0.66, "learning_rate": 3.997295296527001e-06, "loss": 0.7538, "step": 222 }, { "epoch": 0.66, "logps_train/policy_1_2": -125.2237319946289, "logps_train/policy_1_l": -107.84510803222656, "logps_train/policy_1_w": -87.77474212646484, "logps_train/policy_2_2": -99.65711212158203, "logps_train/policy_2_w": -111.93260192871094, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -99.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -118.5, "rewards_train/1-2": 0.884268045425415, "rewards_train/1-l": -1.3052139282226562, "rewards_train/1-w": 1.123306393623352, "rewards_train/2-2": 1.8766716718673706, "rewards_train/2-w": 0.6340836882591248, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.4285203218460083, "rewards_train/margins_1": 0.239038348197937, "rewards_train/margins_2": 1.2425879836082458, "step": 222 }, { "epoch": 0.66, "logps_train/policy_1_2": -83.02195739746094, "logps_train/policy_1_l": -120.91958618164062, "logps_train/policy_1_w": -99.71626281738281, "logps_train/policy_2_2": -63.976524353027344, "logps_train/policy_2_w": -129.3171844482422, "logps_train/ref_1_2": -87.5, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.45249143242836, "rewards_train/1-l": -1.3807649612426758, "rewards_train/1-w": 1.4730031490325928, "rewards_train/2-2": 0.9912147521972656, "rewards_train/2-w": 0.4280467927455902, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8537681102752686, "rewards_train/margins_1": 1.0205117166042328, "rewards_train/margins_2": 0.5631679594516754, "step": 222 }, { "epoch": 0.66, "logps_train/policy_1_2": -137.72068786621094, "logps_train/policy_1_l": -192.16860961914062, "logps_train/policy_1_w": -143.002197265625, "logps_train/policy_2_2": -112.64159393310547, "logps_train/policy_2_w": -176.00140380859375, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.849806308746338, "rewards_train/1-l": -2.0455727577209473, "rewards_train/1-w": 2.2417736053466797, "rewards_train/2-2": 2.0568370819091797, "rewards_train/2-w": 1.244000792503357, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.287346363067627, "rewards_train/margins_1": 0.3919672966003418, "rewards_train/margins_2": 0.8128362894058228, "step": 222 }, { "epoch": 0.66, "logps_train/policy_1_2": -132.0204620361328, "logps_train/policy_1_l": -96.95297241210938, "logps_train/policy_1_w": -109.35662841796875, "logps_train/policy_2_2": -91.33464813232422, "logps_train/policy_2_w": -145.40863037109375, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.6557660698890686, "rewards_train/1-l": -0.9546723365783691, "rewards_train/1-w": 1.0401191711425781, "rewards_train/2-2": 1.7157537937164307, "rewards_train/2-w": 0.07476136088371277, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.9947915077209473, "rewards_train/margins_1": 0.3843531012535095, "rewards_train/margins_2": 1.640992432832718, "step": 222 }, { "epoch": 0.66, "logps_train/policy_1_2": -138.992919921875, "logps_train/policy_1_l": -129.50418090820312, "logps_train/policy_1_w": -115.96641540527344, "logps_train/policy_2_2": -109.57534790039062, "logps_train/policy_2_w": -155.89810180664062, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 0.6288325786590576, "rewards_train/1-l": -1.0647733211517334, "rewards_train/1-w": 2.3088278770446777, "rewards_train/2-2": 1.5198087692260742, "rewards_train/2-w": 0.9062832593917847, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.373601198196411, "rewards_train/margins_1": 1.6799952983856201, "rewards_train/margins_2": 0.6135255098342896, "step": 222 }, { "epoch": 0.66, "logps_train/policy_1_2": -163.75250244140625, "logps_train/policy_1_l": -154.5869903564453, "logps_train/policy_1_w": -112.04866027832031, "logps_train/policy_2_2": -132.95516967773438, "logps_train/policy_2_w": -134.0945281982422, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 1.6903746128082275, "rewards_train/1-l": -1.5614337921142578, "rewards_train/1-w": 1.8709158897399902, "rewards_train/2-2": 2.4951086044311523, "rewards_train/2-w": 0.8999216556549072, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.432349681854248, "rewards_train/margins_1": 0.1805412769317627, "rewards_train/margins_2": 1.5951869487762451, "step": 222 }, { "epoch": 0.66, "logps_train/policy_1_2": -116.74921417236328, "logps_train/policy_1_l": -127.0762939453125, "logps_train/policy_1_w": -112.52169799804688, "logps_train/policy_2_2": -99.35950469970703, "logps_train/policy_2_w": -134.28628540039062, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.0547658205032349, "rewards_train/1-l": -1.533801555633545, "rewards_train/1-w": 2.224393367767334, "rewards_train/2-2": 1.6687368154525757, "rewards_train/2-w": 1.38191819190979, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.758194923400879, "rewards_train/margins_1": 1.1696275472640991, "rewards_train/margins_2": 0.28681862354278564, "step": 222 }, { "epoch": 0.66, "logps_train/policy_1_2": -137.84622192382812, "logps_train/policy_1_l": -161.32424926757812, "logps_train/policy_1_w": -116.66542053222656, "logps_train/policy_2_2": -120.89263153076172, "logps_train/policy_2_w": -134.28347778320312, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 1.4239717721939087, "rewards_train/1-l": -1.676980972290039, "rewards_train/1-w": 2.121739149093628, "rewards_train/2-2": 1.8997989892959595, "rewards_train/2-w": 1.6982157230377197, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.798720121383667, "rewards_train/margins_1": 0.6977673768997192, "rewards_train/margins_2": 0.20158326625823975, "step": 222 }, { "epoch": 0.67, "logps_train/policy_1_2": -152.53887939453125, "logps_train/policy_1_l": -99.37498474121094, "logps_train/policy_1_w": -104.51058959960938, "logps_train/policy_2_2": -115.56391906738281, "logps_train/policy_2_w": -127.15675354003906, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 0.9656426906585693, "rewards_train/1-l": -0.9015607237815857, "rewards_train/1-w": 2.047769069671631, "rewards_train/2-2": 2.516263961791992, "rewards_train/2-w": 1.3741686344146729, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.9493297934532166, "rewards_train/margins_1": 1.0821263790130615, "rewards_train/margins_2": 1.1420953273773193, "step": 223 }, { "epoch": 0.67, "logps_train/policy_1_2": -129.5140380859375, "logps_train/policy_1_l": -156.54815673828125, "logps_train/policy_1_w": -81.00947570800781, "logps_train/policy_2_2": -98.61917877197266, "logps_train/policy_2_w": -98.12312316894531, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": 1.2017204761505127, "rewards_train/1-l": -2.8880176544189453, "rewards_train/1-w": 2.774052143096924, "rewards_train/2-2": 2.4857382774353027, "rewards_train/2-w": 2.2392501831054688, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.662069797515869, "rewards_train/margins_1": 1.5723316669464111, "rewards_train/margins_2": 0.24648809432983398, "step": 223 }, { "epoch": 0.67, "logps_train/policy_1_2": -139.7201385498047, "logps_train/policy_1_l": -99.33348083496094, "logps_train/policy_1_w": -89.15225982666016, "logps_train/policy_2_2": -115.24024963378906, "logps_train/policy_2_w": -112.80988311767578, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -87.5, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 0.7873610258102417, "rewards_train/1-l": -1.1731913089752197, "rewards_train/1-w": 1.8660237789154053, "rewards_train/2-2": 1.6181628704071045, "rewards_train/2-w": 1.4486991167068481, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.039215087890625, "rewards_train/margins_1": 1.0786627531051636, "rewards_train/margins_2": 0.16946375370025635, "step": 223 }, { "epoch": 0.67, "logps_train/policy_1_2": -112.03704833984375, "logps_train/policy_1_l": -156.65243530273438, "logps_train/policy_1_w": -93.51156616210938, "logps_train/policy_2_2": -85.54306030273438, "logps_train/policy_2_w": -111.92705535888672, "logps_train/ref_1_2": -122.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -112.5, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 1.0072332620620728, "rewards_train/1-l": -1.6930763721466064, "rewards_train/1-w": 1.8761868476867676, "rewards_train/2-2": 1.7519434690475464, "rewards_train/2-w": 1.292060375213623, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.569263219833374, "rewards_train/margins_1": 0.8689535856246948, "rewards_train/margins_2": 0.45988309383392334, "step": 223 }, { "epoch": 0.67, "logps_train/policy_1_2": -130.0669708251953, "logps_train/policy_1_l": -171.447021484375, "logps_train/policy_1_w": -144.7747802734375, "logps_train/policy_2_2": -107.85771179199219, "logps_train/policy_2_w": -175.59463500976562, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.1245534420013428, "rewards_train/1-l": -1.2648197412490845, "rewards_train/1-w": 2.5381484031677246, "rewards_train/2-2": 1.7837598323822021, "rewards_train/2-w": 1.5374115705490112, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.802968144416809, "rewards_train/margins_1": 1.4135949611663818, "rewards_train/margins_2": 0.24634826183319092, "step": 223 }, { "epoch": 0.67, "logps_train/policy_1_2": -171.778564453125, "logps_train/policy_1_l": -202.28355407714844, "logps_train/policy_1_w": -153.88143920898438, "logps_train/policy_2_2": -140.44845581054688, "logps_train/policy_2_w": -196.5004425048828, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.8487062454223633, "rewards_train/1-l": -1.947106122970581, "rewards_train/1-w": 2.969670057296753, "rewards_train/2-2": 2.632498264312744, "rewards_train/2-w": 1.7175339460372925, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.916776180267334, "rewards_train/margins_1": 1.1209638118743896, "rewards_train/margins_2": 0.9149643182754517, "step": 223 }, { "epoch": 0.67, "logps_train/policy_1_2": -122.06012725830078, "logps_train/policy_1_l": -155.41749572753906, "logps_train/policy_1_w": -125.64033508300781, "logps_train/policy_2_2": -89.00889587402344, "logps_train/policy_2_w": -166.43896484375, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -107.5, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 0.820549726486206, "rewards_train/1-l": -1.127687692642212, "rewards_train/1-w": 1.8136028051376343, "rewards_train/2-2": 1.8272356986999512, "rewards_train/2-w": 0.8786616325378418, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.941290497779846, "rewards_train/margins_1": 0.9930530786514282, "rewards_train/margins_2": 0.9485740661621094, "step": 223 }, { "epoch": 0.67, "logps_train/policy_1_2": -176.10150146484375, "logps_train/policy_1_l": -191.96194458007812, "logps_train/policy_1_w": -163.3349609375, "logps_train/policy_2_2": -148.63995361328125, "logps_train/policy_2_w": -187.4951629638672, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.5570372343063354, "rewards_train/1-l": -1.5282275676727295, "rewards_train/1-w": 2.166504383087158, "rewards_train/2-2": 2.418034553527832, "rewards_train/2-w": 1.2918908596038818, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6947319507598877, "rewards_train/margins_1": 0.6094671487808228, "rewards_train/margins_2": 1.1261436939239502, "step": 223 }, { "epoch": 0.67, "learning_rate": 3.977444095126146e-06, "loss": 0.7585, "step": 224 }, { "epoch": 0.67, "logps_train/policy_1_2": -264.9268798828125, "logps_train/policy_1_l": -166.96144104003906, "logps_train/policy_1_w": -183.64962768554688, "logps_train/policy_2_2": -223.04107666015625, "logps_train/policy_2_w": -243.15399169921875, "logps_train/ref_1_2": -280.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -256.0, "logps_train/ref_2_w": -258.0, "rewards_train/1-2": 1.4666885137557983, "rewards_train/1-l": -1.596144199371338, "rewards_train/1-w": 3.520975112915039, "rewards_train/2-2": 3.2708914279937744, "rewards_train/2-w": 1.5689752101898193, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.117119312286377, "rewards_train/margins_1": 2.0542865991592407, "rewards_train/margins_2": 1.701916217803955, "step": 224 }, { "epoch": 0.67, "logps_train/policy_1_2": -133.85125732421875, "logps_train/policy_1_l": -169.51876831054688, "logps_train/policy_1_w": -108.87178039550781, "logps_train/policy_2_2": -103.73304748535156, "logps_train/policy_2_w": -151.50704956054688, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 0.6453431844711304, "rewards_train/1-l": -1.8196499347686768, "rewards_train/1-w": 2.6253223419189453, "rewards_train/2-2": 1.596226692199707, "rewards_train/2-w": 1.269608497619629, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.444972276687622, "rewards_train/margins_1": 1.979979157447815, "rewards_train/margins_2": 0.3266181945800781, "step": 224 }, { "epoch": 0.67, "logps_train/policy_1_2": -118.5705795288086, "logps_train/policy_1_l": -143.26882934570312, "logps_train/policy_1_w": -132.7454376220703, "logps_train/policy_2_2": -91.92024230957031, "logps_train/policy_2_w": -179.7582550048828, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 0.8148171901702881, "rewards_train/1-l": -0.9030552506446838, "rewards_train/1-w": 2.1770191192626953, "rewards_train/2-2": 1.315007209777832, "rewards_train/2-w": 0.8554248809814453, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.080074369907379, "rewards_train/margins_1": 1.3622019290924072, "rewards_train/margins_2": 0.4595823287963867, "step": 224 }, { "epoch": 0.67, "logps_train/policy_1_2": -203.52200317382812, "logps_train/policy_1_l": -222.99244689941406, "logps_train/policy_1_w": -106.0187759399414, "logps_train/policy_2_2": -169.44351196289062, "logps_train/policy_2_w": -132.6368865966797, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 1.4790486097335815, "rewards_train/1-l": -2.9023709297180176, "rewards_train/1-w": 1.9051539897918701, "rewards_train/2-2": 2.529086112976074, "rewards_train/2-w": 1.2183434963226318, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.807524919509888, "rewards_train/margins_1": 0.4261053800582886, "rewards_train/margins_2": 1.3107426166534424, "step": 224 }, { "epoch": 0.67, "logps_train/policy_1_2": -129.86912536621094, "logps_train/policy_1_l": -141.28524780273438, "logps_train/policy_1_w": -72.15202331542969, "logps_train/policy_2_2": -96.99468994140625, "logps_train/policy_2_w": -99.57167053222656, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -112.5, "rewards_train/1-2": 0.6755876541137695, "rewards_train/1-l": -1.6279377937316895, "rewards_train/1-w": 1.602717638015747, "rewards_train/2-2": 2.11928129196167, "rewards_train/2-w": 1.2802351713180542, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2306554317474365, "rewards_train/margins_1": 0.9271299839019775, "rewards_train/margins_2": 0.8390461206436157, "step": 224 }, { "epoch": 0.67, "logps_train/policy_1_2": -188.63784790039062, "logps_train/policy_1_l": -122.15091705322266, "logps_train/policy_1_w": -121.45230102539062, "logps_train/policy_2_2": -145.04916381835938, "logps_train/policy_2_w": -154.1643829345703, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.904964804649353, "rewards_train/1-l": -1.5654339790344238, "rewards_train/1-w": 2.7934420108795166, "rewards_train/2-2": 3.8169589042663574, "rewards_train/2-w": 1.5874683856964111, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.35887598991394, "rewards_train/margins_1": 0.8884772062301636, "rewards_train/margins_2": 2.2294905185699463, "step": 224 }, { "epoch": 0.67, "logps_train/policy_1_2": -164.33200073242188, "logps_train/policy_1_l": -135.3395233154297, "logps_train/policy_1_w": -111.67584228515625, "logps_train/policy_2_2": -144.0804443359375, "logps_train/policy_2_w": -129.11180114746094, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.6543008089065552, "rewards_train/1-l": -1.689812183380127, "rewards_train/1-w": 1.8214783668518066, "rewards_train/2-2": 2.270081043243408, "rewards_train/2-w": 1.3700700998306274, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5112905502319336, "rewards_train/margins_1": 0.16717755794525146, "rewards_train/margins_2": 0.9000109434127808, "step": 224 }, { "epoch": 0.67, "logps_train/policy_1_2": -174.29925537109375, "logps_train/policy_1_l": -117.95523834228516, "logps_train/policy_1_w": -158.40859985351562, "logps_train/policy_2_2": -146.185302734375, "logps_train/policy_2_w": -208.4228057861328, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 1.5966378450393677, "rewards_train/1-l": -1.1256015300750732, "rewards_train/1-w": 2.4392175674438477, "rewards_train/2-2": 2.3967032432556152, "rewards_train/2-w": 0.8280322551727295, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.564819097518921, "rewards_train/margins_1": 0.84257972240448, "rewards_train/margins_2": 1.5686709880828857, "step": 224 }, { "epoch": 0.67, "logps_train/policy_1_2": -186.92697143554688, "logps_train/policy_1_l": -258.7618408203125, "logps_train/policy_1_w": -140.86373901367188, "logps_train/policy_2_2": -159.33209228515625, "logps_train/policy_2_w": -176.5977783203125, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -237.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.5198032855987549, "rewards_train/1-l": -2.2027487754821777, "rewards_train/1-w": 3.095266342163086, "rewards_train/2-2": 2.369915008544922, "rewards_train/2-w": 2.1370983123779297, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.298015117645264, "rewards_train/margins_1": 1.575463056564331, "rewards_train/margins_2": 0.2328166961669922, "step": 225 }, { "epoch": 0.67, "logps_train/policy_1_2": -95.48457336425781, "logps_train/policy_1_l": -152.60772705078125, "logps_train/policy_1_w": -99.90472412109375, "logps_train/policy_2_2": -72.328857421875, "logps_train/policy_2_w": -117.84429931640625, "logps_train/ref_1_2": -106.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -86.5, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.0343542098999023, "rewards_train/1-l": -1.605425477027893, "rewards_train/1-w": 2.1649961471557617, "rewards_train/2-2": 1.3983638286590576, "rewards_train/2-w": 1.6202579736709595, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.770421624183655, "rewards_train/margins_1": 1.1306419372558594, "rewards_train/margins_2": -0.22189414501190186, "step": 225 }, { "epoch": 0.67, "logps_train/policy_1_2": -100.95435333251953, "logps_train/policy_1_l": -172.32528686523438, "logps_train/policy_1_w": -119.14476013183594, "logps_train/policy_2_2": -79.7957992553711, "logps_train/policy_2_w": -150.06619262695312, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.0936273336410522, "rewards_train/1-l": -1.8922957181930542, "rewards_train/1-w": 2.8116955757141113, "rewards_train/2-2": 1.3883888721466064, "rewards_train/2-w": 1.441623568534851, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.7039912939071655, "rewards_train/margins_1": 1.718068242073059, "rewards_train/margins_2": -0.05323469638824463, "step": 225 }, { "epoch": 0.67, "logps_train/policy_1_2": -164.74945068359375, "logps_train/policy_1_l": -206.38290405273438, "logps_train/policy_1_w": -121.628662109375, "logps_train/policy_2_2": -132.39695739746094, "logps_train/policy_2_w": -156.5106201171875, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.565680980682373, "rewards_train/1-l": -3.784580707550049, "rewards_train/1-w": 2.1566648483276367, "rewards_train/2-2": 2.7798349857330322, "rewards_train/2-w": 0.3333130180835724, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.9412455558776855, "rewards_train/margins_1": 0.5909838676452637, "rewards_train/margins_2": 2.44652196764946, "step": 225 }, { "epoch": 0.67, "logps_train/policy_1_2": -154.52528381347656, "logps_train/policy_1_l": -128.1978759765625, "logps_train/policy_1_w": -123.94941711425781, "logps_train/policy_2_2": -128.33265686035156, "logps_train/policy_2_w": -144.76547241210938, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.251573085784912, "rewards_train/1-l": -0.84927898645401, "rewards_train/1-w": 1.700761318206787, "rewards_train/2-2": 1.7344093322753906, "rewards_train/2-w": 0.8586088418960571, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.550040304660797, "rewards_train/margins_1": 0.449188232421875, "rewards_train/margins_2": 0.8758004903793335, "step": 225 }, { "epoch": 0.67, "logps_train/policy_1_2": -198.61257934570312, "logps_train/policy_1_l": -179.6851043701172, "logps_train/policy_1_w": -120.84033966064453, "logps_train/policy_2_2": -160.8412628173828, "logps_train/policy_2_w": -163.4164276123047, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.5606173276901245, "rewards_train/1-l": -1.6935104131698608, "rewards_train/1-w": 3.000340700149536, "rewards_train/2-2": 3.1174371242523193, "rewards_train/2-w": 1.514607310295105, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.693851113319397, "rewards_train/margins_1": 1.4397233724594116, "rewards_train/margins_2": 1.6028298139572144, "step": 225 }, { "epoch": 0.67, "logps_train/policy_1_2": -153.66290283203125, "logps_train/policy_1_l": -278.8670654296875, "logps_train/policy_1_w": -171.6763916015625, "logps_train/policy_2_2": -120.02589416503906, "logps_train/policy_2_w": -224.77020263671875, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -260.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.8122241497039795, "rewards_train/1-l": -1.8778185844421387, "rewards_train/1-w": 2.3756232261657715, "rewards_train/2-2": 2.4562973976135254, "rewards_train/2-w": 0.8852841854095459, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.25344181060791, "rewards_train/margins_1": 0.563399076461792, "rewards_train/margins_2": 1.5710132122039795, "step": 225 }, { "epoch": 0.67, "logps_train/policy_1_2": -148.73031616210938, "logps_train/policy_1_l": -159.64846801757812, "logps_train/policy_1_w": -134.82533264160156, "logps_train/policy_2_2": -111.00770568847656, "logps_train/policy_2_w": -179.95687866210938, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.1832189559936523, "rewards_train/1-l": -1.5362346172332764, "rewards_train/1-w": 2.836216449737549, "rewards_train/2-2": 2.4617302417755127, "rewards_train/2-w": 1.226186752319336, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.372451066970825, "rewards_train/margins_1": 1.6529974937438965, "rewards_train/margins_2": 1.2355434894561768, "step": 225 }, { "epoch": 0.68, "learning_rate": 3.957448697786181e-06, "loss": 0.7578, "step": 226 }, { "epoch": 0.68, "logps_train/policy_1_2": -219.8865966796875, "logps_train/policy_1_l": -318.5106506347656, "logps_train/policy_1_w": -137.36148071289062, "logps_train/policy_2_2": -183.00250244140625, "logps_train/policy_2_w": -173.7638702392578, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -280.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -211.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.3894652128219604, "rewards_train/1-l": -3.9432530403137207, "rewards_train/1-w": 3.3513522148132324, "rewards_train/2-2": 2.795062780380249, "rewards_train/2-w": 2.2048630714416504, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.294605255126953, "rewards_train/margins_1": 1.961887001991272, "rewards_train/margins_2": 0.5901997089385986, "step": 226 }, { "epoch": 0.68, "logps_train/policy_1_2": -186.6264190673828, "logps_train/policy_1_l": -139.77403259277344, "logps_train/policy_1_w": -120.44197845458984, "logps_train/policy_2_2": -151.18057250976562, "logps_train/policy_2_w": -150.06324768066406, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 0.7647019624710083, "rewards_train/1-l": -1.169785976409912, "rewards_train/1-w": 1.7163496017456055, "rewards_train/2-2": 1.960067868232727, "rewards_train/2-w": 1.0280511379241943, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8861355781555176, "rewards_train/margins_1": 0.9516476392745972, "rewards_train/margins_2": 0.9320167303085327, "step": 226 }, { "epoch": 0.68, "logps_train/policy_1_2": -107.0548324584961, "logps_train/policy_1_l": -133.40061950683594, "logps_train/policy_1_w": -54.56776428222656, "logps_train/policy_2_2": -89.36199188232422, "logps_train/policy_2_w": -65.21650695800781, "logps_train/ref_1_2": -114.5, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -66.0, "logps_train/ref_2_2": -104.5, "logps_train/ref_2_w": -73.5, "rewards_train/1-2": 0.7445166110992432, "rewards_train/1-l": -0.8927968740463257, "rewards_train/1-w": 1.1502552032470703, "rewards_train/2-2": 1.4966130256652832, "rewards_train/2-w": 0.8013967275619507, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.043052077293396, "rewards_train/margins_1": 0.40573859214782715, "rewards_train/margins_2": 0.6952162981033325, "step": 226 }, { "epoch": 0.68, "logps_train/policy_1_2": -201.7934112548828, "logps_train/policy_1_l": -202.53846740722656, "logps_train/policy_1_w": -160.01077270507812, "logps_train/policy_2_2": -160.7057342529297, "logps_train/policy_2_w": -209.67491149902344, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -221.0, "rewards_train/1-2": 1.4487841129302979, "rewards_train/1-l": -1.3804099559783936, "rewards_train/1-w": 2.412984609603882, "rewards_train/2-2": 2.7169265747070312, "rewards_train/2-w": 1.1575086116790771, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7933945655822754, "rewards_train/margins_1": 0.964200496673584, "rewards_train/margins_2": 1.559417963027954, "step": 226 }, { "epoch": 0.68, "logps_train/policy_1_2": -140.53652954101562, "logps_train/policy_1_l": -224.27442932128906, "logps_train/policy_1_w": -136.95704650878906, "logps_train/policy_2_2": -101.34913635253906, "logps_train/policy_2_w": -165.42103576660156, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 0.9619722962379456, "rewards_train/1-l": -2.5930676460266113, "rewards_train/1-w": 2.1886708736419678, "rewards_train/2-2": 1.9650859832763672, "rewards_train/2-w": 1.2610219717025757, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.781738519668579, "rewards_train/margins_1": 1.2266985774040222, "rewards_train/margins_2": 0.7040640115737915, "step": 226 }, { "epoch": 0.68, "logps_train/policy_1_2": -186.83599853515625, "logps_train/policy_1_l": -154.29708862304688, "logps_train/policy_1_w": -176.4947052001953, "logps_train/policy_2_2": -147.67135620117188, "logps_train/policy_2_w": -218.7908935546875, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -205.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": 1.8132753372192383, "rewards_train/1-l": -1.655490756034851, "rewards_train/1-w": 2.8177170753479004, "rewards_train/2-2": 3.0594258308410645, "rewards_train/2-w": 0.9966920018196106, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.4732078313827515, "rewards_train/margins_1": 1.004441738128662, "rewards_train/margins_2": 2.062733829021454, "step": 226 }, { "epoch": 0.68, "logps_train/policy_1_2": -125.96369934082031, "logps_train/policy_1_l": -173.06895446777344, "logps_train/policy_1_w": -95.88105773925781, "logps_train/policy_2_2": -105.83795166015625, "logps_train/policy_2_w": -119.28146362304688, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 1.5902504920959473, "rewards_train/1-l": -2.5503525733947754, "rewards_train/1-w": 1.8048624992370605, "rewards_train/2-2": 2.2565858364105225, "rewards_train/2-w": 0.9257599711418152, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.355215072631836, "rewards_train/margins_1": 0.21461200714111328, "rewards_train/margins_2": 1.3308258652687073, "step": 226 }, { "epoch": 0.68, "logps_train/policy_1_2": -181.98666381835938, "logps_train/policy_1_l": -168.78443908691406, "logps_train/policy_1_w": -150.45664978027344, "logps_train/policy_2_2": -137.5343780517578, "logps_train/policy_2_w": -188.66519165039062, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.2513335943222046, "rewards_train/1-l": -0.05754488706588745, "rewards_train/1-w": 2.929335355758667, "rewards_train/2-2": 2.8121867179870605, "rewards_train/2-w": 1.8647311925888062, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.9868802428245544, "rewards_train/margins_1": 1.6780017614364624, "rewards_train/margins_2": 0.9474555253982544, "step": 226 }, { "epoch": 0.68, "logps_train/policy_1_2": -261.4613952636719, "logps_train/policy_1_l": -252.35476684570312, "logps_train/policy_1_w": -196.27679443359375, "logps_train/policy_2_2": -221.7918701171875, "logps_train/policy_2_w": -256.9516906738281, "logps_train/ref_1_2": -284.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -258.0, "logps_train/ref_2_w": -266.0, "rewards_train/1-2": 2.1913604736328125, "rewards_train/1-l": -2.16926646232605, "rewards_train/1-w": 2.3953676223754883, "rewards_train/2-2": 3.57706356048584, "rewards_train/2-w": 0.8899871706962585, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.564634084701538, "rewards_train/margins_1": 0.20400714874267578, "rewards_train/margins_2": 2.6870763897895813, "step": 227 }, { "epoch": 0.68, "logps_train/policy_1_2": -171.46035766601562, "logps_train/policy_1_l": -150.08798217773438, "logps_train/policy_1_w": -122.29042053222656, "logps_train/policy_2_2": -143.6392364501953, "logps_train/policy_2_w": -145.55589294433594, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 2.135214328765869, "rewards_train/1-l": -1.4072359800338745, "rewards_train/1-w": 2.6193952560424805, "rewards_train/2-2": 3.164201498031616, "rewards_train/2-w": 1.8350359201431274, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.026631236076355, "rewards_train/margins_1": 0.48418092727661133, "rewards_train/margins_2": 1.3291655778884888, "step": 227 }, { "epoch": 0.68, "logps_train/policy_1_2": -202.26004028320312, "logps_train/policy_1_l": -147.02056884765625, "logps_train/policy_1_w": -152.6732177734375, "logps_train/policy_2_2": -167.4915008544922, "logps_train/policy_2_w": -180.16769409179688, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.670577883720398, "rewards_train/1-l": -0.8065484166145325, "rewards_train/1-w": 2.5683226585388184, "rewards_train/2-2": 2.979365587234497, "rewards_train/2-w": 1.7461216449737549, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.374871075153351, "rewards_train/margins_1": 0.8977447748184204, "rewards_train/margins_2": 1.2332439422607422, "step": 227 }, { "epoch": 0.68, "logps_train/policy_1_2": -148.80418395996094, "logps_train/policy_1_l": -193.31455993652344, "logps_train/policy_1_w": -158.90557861328125, "logps_train/policy_2_2": -116.84700012207031, "logps_train/policy_2_w": -203.116455078125, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -225.0, "rewards_train/1-2": 1.4164564609527588, "rewards_train/1-l": -2.0843849182128906, "rewards_train/1-w": 3.5453803539276123, "rewards_train/2-2": 2.1090500354766846, "rewards_train/2-w": 2.210228681564331, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.629765272140503, "rewards_train/margins_1": 2.1289238929748535, "rewards_train/margins_2": -0.10117864608764648, "step": 227 }, { "epoch": 0.68, "logps_train/policy_1_2": -209.8982391357422, "logps_train/policy_1_l": -218.64361572265625, "logps_train/policy_1_w": -143.21400451660156, "logps_train/policy_2_2": -168.85440063476562, "logps_train/policy_2_w": -183.19081115722656, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.221114158630371, "rewards_train/1-l": -2.5065479278564453, "rewards_train/1-w": 2.219224691390991, "rewards_train/2-2": 2.502840518951416, "rewards_train/2-w": 1.1152936220169067, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.7257726192474365, "rewards_train/margins_1": 0.9981105327606201, "rewards_train/margins_2": 1.3875468969345093, "step": 227 }, { "epoch": 0.68, "logps_train/policy_1_2": -159.7447052001953, "logps_train/policy_1_l": -182.27732849121094, "logps_train/policy_1_w": -123.20219421386719, "logps_train/policy_2_2": -124.92882537841797, "logps_train/policy_2_w": -151.1688232421875, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.9204509258270264, "rewards_train/1-l": -1.3082014322280884, "rewards_train/1-w": 1.768257737159729, "rewards_train/2-2": 2.3989145755767822, "rewards_train/2-w": 1.1604621410369873, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0764591693878174, "rewards_train/margins_1": -0.15219318866729736, "rewards_train/margins_2": 1.238452434539795, "step": 227 }, { "epoch": 0.68, "logps_train/policy_1_2": -127.30154418945312, "logps_train/policy_1_l": -117.9140853881836, "logps_train/policy_1_w": -132.47830200195312, "logps_train/policy_2_2": -104.61382293701172, "logps_train/policy_2_w": -173.6190948486328, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 0.9573454856872559, "rewards_train/1-l": -1.0835962295532227, "rewards_train/1-w": 2.328733444213867, "rewards_train/2-2": 1.56361722946167, "rewards_train/2-w": 1.0396530628204346, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.41232967376709, "rewards_train/margins_1": 1.3713879585266113, "rewards_train/margins_2": 0.5239641666412354, "step": 227 }, { "epoch": 0.68, "logps_train/policy_1_2": -142.69834899902344, "logps_train/policy_1_l": -187.51058959960938, "logps_train/policy_1_w": -161.8949737548828, "logps_train/policy_2_2": -116.03099060058594, "logps_train/policy_2_w": -197.2129364013672, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 0.7379778027534485, "rewards_train/1-l": -1.994175910949707, "rewards_train/1-w": 2.1712450981140137, "rewards_train/2-2": 1.1457295417785645, "rewards_train/2-w": 1.2091755867004395, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.165421009063721, "rewards_train/margins_1": 1.4332672953605652, "rewards_train/margins_2": -0.063446044921875, "step": 227 }, { "epoch": 0.68, "learning_rate": 3.937311056022634e-06, "loss": 0.719, "step": 228 }, { "epoch": 0.68, "logps_train/policy_1_2": -120.26889038085938, "logps_train/policy_1_l": -154.02487182617188, "logps_train/policy_1_w": -101.9322280883789, "logps_train/policy_2_2": -102.34211730957031, "logps_train/policy_2_w": -128.54287719726562, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 2.1293606758117676, "rewards_train/1-l": -1.4876927137374878, "rewards_train/1-w": 1.279726505279541, "rewards_train/2-2": 2.534538745880127, "rewards_train/2-w": 0.5273518562316895, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.767419219017029, "rewards_train/margins_1": -0.8496341705322266, "rewards_train/margins_2": 2.0071868896484375, "step": 228 }, { "epoch": 0.68, "logps_train/policy_1_2": -180.33827209472656, "logps_train/policy_1_l": -196.19699096679688, "logps_train/policy_1_w": -123.29116821289062, "logps_train/policy_2_2": -147.93145751953125, "logps_train/policy_2_w": -148.20083618164062, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.0958616733551025, "rewards_train/1-l": -0.8655964136123657, "rewards_train/1-w": 2.031820774078369, "rewards_train/2-2": 2.5732603073120117, "rewards_train/2-w": 1.4799175262451172, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.897417187690735, "rewards_train/margins_1": 0.9359591007232666, "rewards_train/margins_2": 1.0933427810668945, "step": 228 }, { "epoch": 0.68, "logps_train/policy_1_2": -131.255615234375, "logps_train/policy_1_l": -188.1104736328125, "logps_train/policy_1_w": -104.73804473876953, "logps_train/policy_2_2": -96.14854431152344, "logps_train/policy_2_w": -146.39137268066406, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.449828863143921, "rewards_train/1-l": -1.7571401596069336, "rewards_train/1-w": 1.9039301872253418, "rewards_train/2-2": 2.463514566421509, "rewards_train/2-w": 0.7155505418777466, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6610703468322754, "rewards_train/margins_1": 0.4541013240814209, "rewards_train/margins_2": 1.7479640245437622, "step": 228 }, { "epoch": 0.68, "logps_train/policy_1_2": -136.30892944335938, "logps_train/policy_1_l": -194.7490692138672, "logps_train/policy_1_w": -152.29132080078125, "logps_train/policy_2_2": -125.24009704589844, "logps_train/policy_2_w": -166.6922607421875, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": 0.8694973587989807, "rewards_train/1-l": -2.3965859413146973, "rewards_train/1-w": 1.7552430629730225, "rewards_train/2-2": 1.1055793762207031, "rewards_train/2-w": 1.3979623317718506, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.15182900428772, "rewards_train/margins_1": 0.8857457041740417, "rewards_train/margins_2": -0.29238295555114746, "step": 228 }, { "epoch": 0.68, "logps_train/policy_1_2": -171.8744659423828, "logps_train/policy_1_l": -144.71646118164062, "logps_train/policy_1_w": -170.69650268554688, "logps_train/policy_2_2": -137.9044189453125, "logps_train/policy_2_w": -228.6730499267578, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.0074750185012817, "rewards_train/1-l": -1.7663735151290894, "rewards_train/1-w": 3.1397244930267334, "rewards_train/2-2": 2.1494014263153076, "rewards_train/2-w": 1.038944125175476, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.906098008155823, "rewards_train/margins_1": 2.1322494745254517, "rewards_train/margins_2": 1.1104573011398315, "step": 228 }, { "epoch": 0.68, "logps_train/policy_1_2": -244.8254852294922, "logps_train/policy_1_l": -211.18594360351562, "logps_train/policy_1_w": -153.59063720703125, "logps_train/policy_2_2": -200.03103637695312, "logps_train/policy_2_w": -198.59213256835938, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.6080756187438965, "rewards_train/1-l": -1.4935939311981201, "rewards_train/1-w": 2.678436279296875, "rewards_train/2-2": 3.278146266937256, "rewards_train/2-w": 1.112661361694336, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.172030210494995, "rewards_train/margins_1": 1.0703606605529785, "rewards_train/margins_2": 2.16548490524292, "step": 228 }, { "epoch": 0.68, "logps_train/policy_1_2": -196.24893188476562, "logps_train/policy_1_l": -248.30572509765625, "logps_train/policy_1_w": -155.27239990234375, "logps_train/policy_2_2": -158.03878784179688, "logps_train/policy_2_w": -192.35670471191406, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.20870041847229, "rewards_train/1-l": -2.5696358680725098, "rewards_train/1-w": 2.3159239292144775, "rewards_train/2-2": 2.332841396331787, "rewards_train/2-w": 1.3600327968597412, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.885559797286987, "rewards_train/margins_1": 1.1072235107421875, "rewards_train/margins_2": 0.9728085994720459, "step": 228 }, { "epoch": 0.68, "logps_train/policy_1_2": -94.87637329101562, "logps_train/policy_1_l": -91.65281677246094, "logps_train/policy_1_w": -58.13316345214844, "logps_train/policy_2_2": -70.32533264160156, "logps_train/policy_2_w": -83.52961730957031, "logps_train/ref_1_2": -106.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -75.5, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": 1.0842373371124268, "rewards_train/1-l": -1.069578766822815, "rewards_train/1-w": 1.7196917533874512, "rewards_train/2-2": 1.538560152053833, "rewards_train/2-w": 0.9329755306243896, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.789270520210266, "rewards_train/margins_1": 0.6354544162750244, "rewards_train/margins_2": 0.6055846214294434, "step": 228 }, { "epoch": 0.69, "logps_train/policy_1_2": -79.18937683105469, "logps_train/policy_1_l": -76.53738403320312, "logps_train/policy_1_w": -68.7989730834961, "logps_train/policy_2_2": -58.787052154541016, "logps_train/policy_2_w": -99.78952026367188, "logps_train/ref_1_2": -91.0, "logps_train/ref_1_l": -67.5, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -75.5, "logps_train/ref_2_w": -106.5, "rewards_train/1-2": 1.1802809238433838, "rewards_train/1-l": -0.8863551616668701, "rewards_train/1-w": 1.7880712747573853, "rewards_train/2-2": 1.677740216255188, "rewards_train/2-w": 0.6812039613723755, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.6744264364242554, "rewards_train/margins_1": 0.6077903509140015, "rewards_train/margins_2": 0.9965362548828125, "step": 229 }, { "epoch": 0.69, "logps_train/policy_1_2": -189.84686279296875, "logps_train/policy_1_l": -152.24386596679688, "logps_train/policy_1_w": -101.10382080078125, "logps_train/policy_2_2": -150.71401977539062, "logps_train/policy_2_w": -126.62056732177734, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 0.9198054075241089, "rewards_train/1-l": -1.352510929107666, "rewards_train/1-w": 1.2943060398101807, "rewards_train/2-2": 2.146176815032959, "rewards_train/2-w": 0.5770057439804077, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.6468169689178467, "rewards_train/margins_1": 0.3745006322860718, "rewards_train/margins_2": 1.5691710710525513, "step": 229 }, { "epoch": 0.69, "logps_train/policy_1_2": -156.5076446533203, "logps_train/policy_1_l": -145.62307739257812, "logps_train/policy_1_w": -126.78594970703125, "logps_train/policy_2_2": -123.75588989257812, "logps_train/policy_2_w": -156.56402587890625, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 1.960172414779663, "rewards_train/1-l": -1.127150535583496, "rewards_train/1-w": 2.3882012367248535, "rewards_train/2-2": 2.8994107246398926, "rewards_train/2-w": 1.2287540435791016, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.5153517723083496, "rewards_train/margins_1": 0.42802882194519043, "rewards_train/margins_2": 1.670656681060791, "step": 229 }, { "epoch": 0.69, "logps_train/policy_1_2": -183.11007690429688, "logps_train/policy_1_l": -132.40345764160156, "logps_train/policy_1_w": -139.05435180664062, "logps_train/policy_2_2": -136.5093994140625, "logps_train/policy_2_w": -191.40167236328125, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -112.5, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 0.9764910936355591, "rewards_train/1-l": -2.004408359527588, "rewards_train/1-w": 2.6187829971313477, "rewards_train/2-2": 2.486560344696045, "rewards_train/2-w": 0.8238941431045532, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.6231913566589355, "rewards_train/margins_1": 1.6422919034957886, "rewards_train/margins_2": 1.6626662015914917, "step": 229 }, { "epoch": 0.69, "logps_train/policy_1_2": -138.77886962890625, "logps_train/policy_1_l": -132.13095092773438, "logps_train/policy_1_w": -81.19561767578125, "logps_train/policy_2_2": -116.27935791015625, "logps_train/policy_2_w": -111.6263427734375, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 0.5408638119697571, "rewards_train/1-l": -1.125985860824585, "rewards_train/1-w": 1.4616875648498535, "rewards_train/2-2": 1.3070250749588013, "rewards_train/2-w": 0.9217409491539001, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.5876734256744385, "rewards_train/margins_1": 0.9208237528800964, "rewards_train/margins_2": 0.3852841258049011, "step": 229 }, { "epoch": 0.69, "logps_train/policy_1_2": -116.10423278808594, "logps_train/policy_1_l": -150.4484405517578, "logps_train/policy_1_w": -53.815277099609375, "logps_train/policy_2_2": -90.83948516845703, "logps_train/policy_2_w": -75.09024047851562, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -69.0, "logps_train/ref_2_2": -101.5, "logps_train/ref_2_w": -87.5, "rewards_train/1-2": 0.38488948345184326, "rewards_train/1-l": -2.426924228668213, "rewards_train/1-w": 1.5077301263809204, "rewards_train/2-2": 1.0508172512054443, "rewards_train/2-w": 1.2409756183624268, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.9346543550491333, "rewards_train/margins_1": 1.1228406429290771, "rewards_train/margins_2": -0.19015836715698242, "step": 229 }, { "epoch": 0.69, "logps_train/policy_1_2": -131.93032836914062, "logps_train/policy_1_l": -113.13896179199219, "logps_train/policy_1_w": -125.13456726074219, "logps_train/policy_2_2": -103.32118225097656, "logps_train/policy_2_w": -155.2357635498047, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.0163413286209106, "rewards_train/1-l": -1.4736618995666504, "rewards_train/1-w": 2.419355869293213, "rewards_train/2-2": 2.005772113800049, "rewards_train/2-w": 1.519391417503357, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8930177688598633, "rewards_train/margins_1": 1.4030145406723022, "rewards_train/margins_2": 0.4863806962966919, "step": 229 }, { "epoch": 0.69, "logps_train/policy_1_2": -59.961570739746094, "logps_train/policy_1_l": -56.47775650024414, "logps_train/policy_1_w": -55.220008850097656, "logps_train/policy_2_2": -36.104923248291016, "logps_train/policy_2_w": -78.18270874023438, "logps_train/ref_1_2": -64.5, "logps_train/ref_1_l": -47.75, "logps_train/ref_1_w": -68.5, "logps_train/ref_2_2": -46.0, "logps_train/ref_2_w": -83.0, "rewards_train/1-2": 0.48196765780448914, "rewards_train/1-l": -0.8655489683151245, "rewards_train/1-w": 1.321358323097229, "rewards_train/2-2": 0.9840388298034668, "rewards_train/2-w": 0.518447756767273, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.1869072914123535, "rewards_train/margins_1": 0.8393906652927399, "rewards_train/margins_2": 0.46559107303619385, "step": 229 }, { "epoch": 0.69, "learning_rate": 3.917033135233845e-06, "loss": 0.8175, "step": 230 }, { "epoch": 0.69, "logps_train/policy_1_2": -118.323974609375, "logps_train/policy_1_l": -90.826904296875, "logps_train/policy_1_w": -74.68944549560547, "logps_train/policy_2_2": -92.85826873779297, "logps_train/policy_2_w": -88.43209075927734, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -83.0, "logps_train/ref_2_2": -104.5, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": 0.15510261058807373, "rewards_train/1-l": -0.8998786211013794, "rewards_train/1-w": 0.8404303789138794, "rewards_train/2-2": 1.173547625541687, "rewards_train/2-w": 0.36665403842926025, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.7403090000152588, "rewards_train/margins_1": 0.6853277683258057, "rewards_train/margins_2": 0.8068935871124268, "step": 230 }, { "epoch": 0.69, "logps_train/policy_1_2": -181.20042419433594, "logps_train/policy_1_l": -156.38253784179688, "logps_train/policy_1_w": -147.52847290039062, "logps_train/policy_2_2": -152.06031799316406, "logps_train/policy_2_w": -176.6483154296875, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.5612068176269531, "rewards_train/1-l": -1.8048547506332397, "rewards_train/1-w": 1.8862152099609375, "rewards_train/2-2": 2.693967819213867, "rewards_train/2-w": 0.4703250229358673, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.6910699605941772, "rewards_train/margins_1": 0.3250083923339844, "rewards_train/margins_2": 2.223642796278, "step": 230 }, { "epoch": 0.69, "logps_train/policy_1_2": -89.72016906738281, "logps_train/policy_1_l": -49.88343048095703, "logps_train/policy_1_w": -51.89374542236328, "logps_train/policy_2_2": -75.21940612792969, "logps_train/policy_2_w": -66.32465362548828, "logps_train/ref_1_2": -99.5, "logps_train/ref_1_l": -43.0, "logps_train/ref_1_w": -61.0, "logps_train/ref_2_2": -90.0, "logps_train/ref_2_w": -70.0, "rewards_train/1-2": 0.968608021736145, "rewards_train/1-l": -0.6874637603759766, "rewards_train/1-w": 0.9334772825241089, "rewards_train/2-2": 1.4899728298187256, "rewards_train/2-w": 0.3903864622116089, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.6209410429000854, "rewards_train/margins_1": -0.03513073921203613, "rewards_train/margins_2": 1.0995863676071167, "step": 230 }, { "epoch": 0.69, "logps_train/policy_1_2": -226.95745849609375, "logps_train/policy_1_l": -223.00418090820312, "logps_train/policy_1_w": -172.20408630371094, "logps_train/policy_2_2": -180.55010986328125, "logps_train/policy_2_w": -227.74868774414062, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -203.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.108940601348877, "rewards_train/1-l": -2.911355495452881, "rewards_train/1-w": 3.0803725719451904, "rewards_train/2-2": 2.9074883460998535, "rewards_train/2-w": 1.0423192977905273, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.991728067398071, "rewards_train/margins_1": 1.9714319705963135, "rewards_train/margins_2": 1.8651690483093262, "step": 230 }, { "epoch": 0.69, "logps_train/policy_1_2": -123.98373413085938, "logps_train/policy_1_l": -173.26666259765625, "logps_train/policy_1_w": -51.64581298828125, "logps_train/policy_2_2": -89.31065368652344, "logps_train/policy_2_w": -73.4362564086914, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -64.0, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -80.0, "rewards_train/1-2": 0.4651280641555786, "rewards_train/1-l": -2.748126983642578, "rewards_train/1-w": 1.250091552734375, "rewards_train/2-2": 1.6329967975616455, "rewards_train/2-w": 0.665749192237854, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.998218536376953, "rewards_train/margins_1": 0.7849634885787964, "rewards_train/margins_2": 0.9672476053237915, "step": 230 }, { "epoch": 0.69, "logps_train/policy_1_2": -159.600341796875, "logps_train/policy_1_l": -177.67385864257812, "logps_train/policy_1_w": -110.99217987060547, "logps_train/policy_2_2": -125.26028442382812, "logps_train/policy_2_w": -137.07154846191406, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.433715581893921, "rewards_train/1-l": -0.5529307126998901, "rewards_train/1-w": 1.7000007629394531, "rewards_train/2-2": 2.442721366882324, "rewards_train/2-w": 1.1115951538085938, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.2529314756393433, "rewards_train/margins_1": 0.2662851810455322, "rewards_train/margins_2": 1.3311262130737305, "step": 230 }, { "epoch": 0.69, "logps_train/policy_1_2": -200.82281494140625, "logps_train/policy_1_l": -152.8873291015625, "logps_train/policy_1_w": -162.13116455078125, "logps_train/policy_2_2": -151.567626953125, "logps_train/policy_2_w": -210.02218627929688, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.7388122081756592, "rewards_train/1-l": -1.457872986793518, "rewards_train/1-w": 3.266571044921875, "rewards_train/2-2": 3.5572991371154785, "rewards_train/2-w": 1.2493430376052856, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.724444031715393, "rewards_train/margins_1": 1.5277588367462158, "rewards_train/margins_2": 2.307956099510193, "step": 230 }, { "epoch": 0.69, "logps_train/policy_1_2": -96.21722412109375, "logps_train/policy_1_l": -123.8119125366211, "logps_train/policy_1_w": -88.91687774658203, "logps_train/policy_2_2": -77.65664672851562, "logps_train/policy_2_w": -109.85751342773438, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -122.5, "rewards_train/1-2": 0.35166651010513306, "rewards_train/1-l": -0.3130273222923279, "rewards_train/1-w": 1.8536245822906494, "rewards_train/2-2": 0.7309167385101318, "rewards_train/2-w": 1.2845613956451416, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.1666519045829773, "rewards_train/margins_1": 1.5019580721855164, "rewards_train/margins_2": -0.5536446571350098, "step": 230 }, { "epoch": 0.69, "logps_train/policy_1_2": -186.6899871826172, "logps_train/policy_1_l": -222.33480834960938, "logps_train/policy_1_w": -123.72016906738281, "logps_train/policy_2_2": -159.64675903320312, "logps_train/policy_2_w": -148.68179321289062, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -191.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.356001853942871, "rewards_train/1-l": -3.1967618465423584, "rewards_train/1-w": 2.573296070098877, "rewards_train/2-2": 2.4587621688842773, "rewards_train/2-w": 1.842759132385254, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.770057916641235, "rewards_train/margins_1": 1.2172942161560059, "rewards_train/margins_2": 0.6160030364990234, "step": 231 }, { "epoch": 0.69, "logps_train/policy_1_2": -207.3644256591797, "logps_train/policy_1_l": -162.08152770996094, "logps_train/policy_1_w": -142.04348754882812, "logps_train/policy_2_2": -150.43984985351562, "logps_train/policy_2_w": -198.82656860351562, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 0.35183945298194885, "rewards_train/1-l": -1.3394029140472412, "rewards_train/1-w": 2.1065874099731445, "rewards_train/2-2": 2.304647922515869, "rewards_train/2-w": 0.8814060688018799, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4459903240203857, "rewards_train/margins_1": 1.7547479569911957, "rewards_train/margins_2": 1.4232418537139893, "step": 231 }, { "epoch": 0.69, "logps_train/policy_1_2": -170.2786865234375, "logps_train/policy_1_l": -149.653564453125, "logps_train/policy_1_w": -116.9759521484375, "logps_train/policy_2_2": -134.63467407226562, "logps_train/policy_2_w": -153.7635040283203, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.0127569437026978, "rewards_train/1-l": -1.5493416786193848, "rewards_train/1-w": 2.6274046897888184, "rewards_train/2-2": 2.1505954265594482, "rewards_train/2-w": 1.8564622402191162, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.176746368408203, "rewards_train/margins_1": 1.6146477460861206, "rewards_train/margins_2": 0.29413318634033203, "step": 231 }, { "epoch": 0.69, "logps_train/policy_1_2": -135.3009033203125, "logps_train/policy_1_l": -130.5619659423828, "logps_train/policy_1_w": -160.314208984375, "logps_train/policy_2_2": -112.37393188476562, "logps_train/policy_2_w": -187.27102661132812, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -116.5, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 0.9169804453849792, "rewards_train/1-l": -1.373384714126587, "rewards_train/1-w": 3.385375499725342, "rewards_train/2-2": 1.5290132761001587, "rewards_train/2-w": 2.419771909713745, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.758760213851929, "rewards_train/margins_1": 2.4683950543403625, "rewards_train/margins_2": -0.8907586336135864, "step": 231 }, { "epoch": 0.69, "logps_train/policy_1_2": -109.18820190429688, "logps_train/policy_1_l": -125.07659912109375, "logps_train/policy_1_w": -182.09548950195312, "logps_train/policy_2_2": -76.73208618164062, "logps_train/policy_2_w": -239.51913452148438, "logps_train/ref_1_2": -120.5, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -94.5, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.1343050003051758, "rewards_train/1-l": -1.48305082321167, "rewards_train/1-w": 1.602949619293213, "rewards_train/2-2": 1.7510102987289429, "rewards_train/2-w": -0.09332139790058136, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.086000442504883, "rewards_train/margins_1": 0.4686446189880371, "rewards_train/margins_2": 1.8443316966295242, "step": 231 }, { "epoch": 0.69, "logps_train/policy_1_2": -116.72823333740234, "logps_train/policy_1_l": -90.35981750488281, "logps_train/policy_1_w": -103.39573669433594, "logps_train/policy_2_2": -81.34870910644531, "logps_train/policy_2_w": -143.66073608398438, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -101.5, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 0.7006136178970337, "rewards_train/1-l": -1.2359812259674072, "rewards_train/1-w": 1.9196064472198486, "rewards_train/2-2": 2.016301155090332, "rewards_train/2-w": 0.5901753902435303, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.155587673187256, "rewards_train/margins_1": 1.218992829322815, "rewards_train/margins_2": 1.4261257648468018, "step": 231 }, { "epoch": 0.69, "logps_train/policy_1_2": -93.48504638671875, "logps_train/policy_1_l": -119.5262680053711, "logps_train/policy_1_w": -98.73273468017578, "logps_train/policy_2_2": -74.43682098388672, "logps_train/policy_2_w": -133.26266479492188, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -115.5, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 0.6524718403816223, "rewards_train/1-l": -1.3436423540115356, "rewards_train/1-w": 1.6782889366149902, "rewards_train/2-2": 1.0182321071624756, "rewards_train/2-w": 0.44092175364494324, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.021931290626526, "rewards_train/margins_1": 1.025817096233368, "rewards_train/margins_2": 0.5773103535175323, "step": 231 }, { "epoch": 0.69, "logps_train/policy_1_2": -142.3033447265625, "logps_train/policy_1_l": -135.0736083984375, "logps_train/policy_1_w": -106.29536437988281, "logps_train/policy_2_2": -114.70521545410156, "logps_train/policy_2_w": -140.550537109375, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 0.9009143114089966, "rewards_train/1-l": -1.430016040802002, "rewards_train/1-w": 1.736088514328003, "rewards_train/2-2": 1.7044780254364014, "rewards_train/2-w": 0.8543221354484558, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.166104555130005, "rewards_train/margins_1": 0.8351742029190063, "rewards_train/margins_2": 0.8501558899879456, "step": 231 }, { "epoch": 0.69, "learning_rate": 3.8966169145091314e-06, "loss": 0.8226, "step": 232 }, { "epoch": 0.69, "logps_train/policy_1_2": -167.7669677734375, "logps_train/policy_1_l": -141.070068359375, "logps_train/policy_1_w": -145.6334686279297, "logps_train/policy_2_2": -126.66474151611328, "logps_train/policy_2_w": -174.9671173095703, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.5545533895492554, "rewards_train/1-l": -0.7386361360549927, "rewards_train/1-w": 1.4065866470336914, "rewards_train/2-2": 2.522587537765503, "rewards_train/2-w": 0.6376639604568481, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.145222783088684, "rewards_train/margins_1": -0.14796674251556396, "rewards_train/margins_2": 1.8849235773086548, "step": 232 }, { "epoch": 0.69, "logps_train/policy_1_2": -145.55047607421875, "logps_train/policy_1_l": -167.33132934570312, "logps_train/policy_1_w": -121.8636245727539, "logps_train/policy_2_2": -120.70631408691406, "logps_train/policy_2_w": -147.00267028808594, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 0.8230769038200378, "rewards_train/1-l": -1.6128201484680176, "rewards_train/1-w": 2.6948866844177246, "rewards_train/2-2": 1.8982412815093994, "rewards_train/2-w": 2.2872328758239746, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.307706832885742, "rewards_train/margins_1": 1.8718097805976868, "rewards_train/margins_2": -0.3889915943145752, "step": 232 }, { "epoch": 0.69, "logps_train/policy_1_2": -120.78514099121094, "logps_train/policy_1_l": -107.02294158935547, "logps_train/policy_1_w": -90.43348693847656, "logps_train/policy_2_2": -93.95542907714844, "logps_train/policy_2_w": -112.80267333984375, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -105.5, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 0.3667981028556824, "rewards_train/1-l": -1.319482445716858, "rewards_train/1-w": 1.4804790019989014, "rewards_train/2-2": 1.163050651550293, "rewards_train/2-w": 0.7884824275970459, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7999614477157593, "rewards_train/margins_1": 1.113680899143219, "rewards_train/margins_2": 0.37456822395324707, "step": 232 }, { "epoch": 0.69, "logps_train/policy_1_2": -150.4174041748047, "logps_train/policy_1_l": -179.5414276123047, "logps_train/policy_1_w": -132.7174072265625, "logps_train/policy_2_2": -122.90973663330078, "logps_train/policy_2_w": -170.035888671875, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 0.9531813859939575, "rewards_train/1-l": -1.96513032913208, "rewards_train/1-w": 2.01116943359375, "rewards_train/2-2": 1.8328546285629272, "rewards_train/2-w": 0.992113471031189, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.97629976272583, "rewards_train/margins_1": 1.0579880475997925, "rewards_train/margins_2": 0.8407411575317383, "step": 232 }, { "epoch": 0.69, "logps_train/policy_1_2": -218.9124298095703, "logps_train/policy_1_l": -219.87664794921875, "logps_train/policy_1_w": -151.41363525390625, "logps_train/policy_2_2": -189.75332641601562, "logps_train/policy_2_w": -184.82394409179688, "logps_train/ref_1_2": -227.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 0.8321458101272583, "rewards_train/1-l": -1.8181343078613281, "rewards_train/1-w": 2.3258230686187744, "rewards_train/2-2": 1.6514264345169067, "rewards_train/2-w": 0.8597927689552307, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.1439573764801025, "rewards_train/margins_1": 1.4936772584915161, "rewards_train/margins_2": 0.791633665561676, "step": 232 }, { "epoch": 0.69, "logps_train/policy_1_2": -177.836181640625, "logps_train/policy_1_l": -156.58157348632812, "logps_train/policy_1_w": -110.86968231201172, "logps_train/policy_2_2": -148.01934814453125, "logps_train/policy_2_w": -144.5366668701172, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 0.7195063829421997, "rewards_train/1-l": -1.7417521476745605, "rewards_train/1-w": 2.0435004234313965, "rewards_train/2-2": 1.9793137311935425, "rewards_train/2-w": 1.229926347732544, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.785252571105957, "rewards_train/margins_1": 1.3239940404891968, "rewards_train/margins_2": 0.7493873834609985, "step": 232 }, { "epoch": 0.69, "logps_train/policy_1_2": -95.93183898925781, "logps_train/policy_1_l": -84.00906372070312, "logps_train/policy_1_w": -106.21768188476562, "logps_train/policy_2_2": -74.76138305664062, "logps_train/policy_2_w": -135.04933166503906, "logps_train/ref_1_2": -96.5, "logps_train/ref_1_l": -79.5, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -80.5, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 0.045096710324287415, "rewards_train/1-l": -0.4762967824935913, "rewards_train/1-w": 1.6766693592071533, "rewards_train/2-2": 0.555893063545227, "rewards_train/2-w": 0.46381711959838867, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.1529661417007446, "rewards_train/margins_1": 1.631572648882866, "rewards_train/margins_2": 0.09207594394683838, "step": 232 }, { "epoch": 0.69, "logps_train/policy_1_2": -242.74087524414062, "logps_train/policy_1_l": -203.13235473632812, "logps_train/policy_1_w": -153.1788330078125, "logps_train/policy_2_2": -177.35105895996094, "logps_train/policy_2_w": -201.81214904785156, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 0.6071628332138062, "rewards_train/1-l": -2.7730021476745605, "rewards_train/1-w": 1.9180532693862915, "rewards_train/2-2": 2.244582176208496, "rewards_train/2-w": 0.21878540515899658, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.691055417060852, "rewards_train/margins_1": 1.3108904361724854, "rewards_train/margins_2": 2.0257967710494995, "step": 232 }, { "epoch": 0.7, "logps_train/policy_1_2": -193.4351348876953, "logps_train/policy_1_l": -145.61605834960938, "logps_train/policy_1_w": -150.0279541015625, "logps_train/policy_2_2": -156.49078369140625, "logps_train/policy_2_w": -178.6318359375, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 0.5924248695373535, "rewards_train/1-l": -1.332504153251648, "rewards_train/1-w": 1.697595477104187, "rewards_train/2-2": 1.8634207248687744, "rewards_train/2-w": 1.223536729812622, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.030099630355835, "rewards_train/margins_1": 1.1051706075668335, "rewards_train/margins_2": 0.6398839950561523, "step": 233 }, { "epoch": 0.7, "logps_train/policy_1_2": -147.87937927246094, "logps_train/policy_1_l": -213.8034210205078, "logps_train/policy_1_w": -170.83926391601562, "logps_train/policy_2_2": -117.31968688964844, "logps_train/policy_2_w": -221.32553100585938, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.0245616436004639, "rewards_train/1-l": -2.9053423404693604, "rewards_train/1-w": 3.1160731315612793, "rewards_train/2-2": 1.8367815017700195, "rewards_train/2-w": 1.215882658958435, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.02141547203064, "rewards_train/margins_1": 2.0915114879608154, "rewards_train/margins_2": 0.6208988428115845, "step": 233 }, { "epoch": 0.7, "logps_train/policy_1_2": -95.90213775634766, "logps_train/policy_1_l": -122.54019927978516, "logps_train/policy_1_w": -133.26657104492188, "logps_train/policy_2_2": -77.05043029785156, "logps_train/policy_2_w": -169.29844665527344, "logps_train/ref_1_2": -106.5, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -92.5, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.0566608905792236, "rewards_train/1-l": -1.0895665884017944, "rewards_train/1-w": 1.563966989517212, "rewards_train/2-2": 1.5660510063171387, "rewards_train/2-w": 0.45296788215637207, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.6535335779190063, "rewards_train/margins_1": 0.5073060989379883, "rewards_train/margins_2": 1.1130831241607666, "step": 233 }, { "epoch": 0.7, "logps_train/policy_1_2": -159.23941040039062, "logps_train/policy_1_l": -151.53610229492188, "logps_train/policy_1_w": -118.00330352783203, "logps_train/policy_2_2": -132.66896057128906, "logps_train/policy_2_w": -145.167724609375, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.3080897331237793, "rewards_train/1-l": -1.3899391889572144, "rewards_train/1-w": 2.3008413314819336, "rewards_train/2-2": 2.293260097503662, "rewards_train/2-w": 1.4357657432556152, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.690780520439148, "rewards_train/margins_1": 0.9927515983581543, "rewards_train/margins_2": 0.8574943542480469, "step": 233 }, { "epoch": 0.7, "logps_train/policy_1_2": -140.18594360351562, "logps_train/policy_1_l": -226.94821166992188, "logps_train/policy_1_w": -118.03938293457031, "logps_train/policy_2_2": -111.1903076171875, "logps_train/policy_2_w": -149.25967407226562, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.1321873664855957, "rewards_train/1-l": -2.2916972637176514, "rewards_train/1-w": 2.1343436241149902, "rewards_train/2-2": 1.760265588760376, "rewards_train/2-w": 1.2865333557128906, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.426040887832642, "rewards_train/margins_1": 1.0021562576293945, "rewards_train/margins_2": 0.47373223304748535, "step": 233 }, { "epoch": 0.7, "logps_train/policy_1_2": -48.060813903808594, "logps_train/policy_1_l": -50.78591537475586, "logps_train/policy_1_w": -54.04216766357422, "logps_train/policy_2_2": -38.7186393737793, "logps_train/policy_2_w": -69.58479309082031, "logps_train/ref_1_2": -52.25, "logps_train/ref_1_l": -46.75, "logps_train/ref_1_w": -63.75, "logps_train/ref_2_2": -45.25, "logps_train/ref_2_w": -76.5, "rewards_train/1-2": 0.4179666340351105, "rewards_train/1-l": -0.4070822596549988, "rewards_train/1-w": 0.970002293586731, "rewards_train/2-2": 0.6678822040557861, "rewards_train/2-w": 0.6774578094482422, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 1.3770845532417297, "rewards_train/margins_1": 0.5520356595516205, "rewards_train/margins_2": -0.009575605392456055, "step": 233 }, { "epoch": 0.7, "logps_train/policy_1_2": -101.88969421386719, "logps_train/policy_1_l": -86.96415710449219, "logps_train/policy_1_w": -94.07701110839844, "logps_train/policy_2_2": -74.72319030761719, "logps_train/policy_2_w": -128.03350830078125, "logps_train/ref_1_2": -103.5, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -84.5, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 0.1657177358865738, "rewards_train/1-l": -0.4649701714515686, "rewards_train/1-w": 1.4783339500427246, "rewards_train/2-2": 0.988129734992981, "rewards_train/2-w": 0.3966497778892517, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.9433041214942932, "rewards_train/margins_1": 1.3126162141561508, "rewards_train/margins_2": 0.5914799571037292, "step": 233 }, { "epoch": 0.7, "logps_train/policy_1_2": -167.721435546875, "logps_train/policy_1_l": -276.6399230957031, "logps_train/policy_1_w": -135.8081817626953, "logps_train/policy_2_2": -132.84716796875, "logps_train/policy_2_w": -173.7906036376953, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -242.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 0.8856673836708069, "rewards_train/1-l": -3.449148654937744, "rewards_train/1-w": 3.028557062149048, "rewards_train/2-2": 1.9168455600738525, "rewards_train/2-w": 1.539689540863037, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.477705717086792, "rewards_train/margins_1": 2.142889678478241, "rewards_train/margins_2": 0.37715601921081543, "step": 233 }, { "epoch": 0.7, "learning_rate": 3.876064386435646e-06, "loss": 0.8102, "step": 234 }, { "epoch": 0.7, "logps_train/policy_1_2": -230.59742736816406, "logps_train/policy_1_l": -135.39749145507812, "logps_train/policy_1_w": -144.29794311523438, "logps_train/policy_2_2": -170.07098388671875, "logps_train/policy_2_w": -198.65774536132812, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 0.984789252281189, "rewards_train/1-l": -1.5530303716659546, "rewards_train/1-w": 3.3616108894348145, "rewards_train/2-2": 3.6335275173187256, "rewards_train/2-w": 1.4811005592346191, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.914641261100769, "rewards_train/margins_1": 2.3768216371536255, "rewards_train/margins_2": 2.1524269580841064, "step": 234 }, { "epoch": 0.7, "logps_train/policy_1_2": -184.45602416992188, "logps_train/policy_1_l": -165.68798828125, "logps_train/policy_1_w": -96.83316802978516, "logps_train/policy_2_2": -160.2286376953125, "logps_train/policy_2_w": -110.24105834960938, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.378617525100708, "rewards_train/1-l": -1.6020020246505737, "rewards_train/1-w": 1.5932462215423584, "rewards_train/2-2": 2.0126843452453613, "rewards_train/2-w": 1.236831784248352, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.195248246192932, "rewards_train/margins_1": 0.2146286964416504, "rewards_train/margins_2": 0.7758525609970093, "step": 234 }, { "epoch": 0.7, "logps_train/policy_1_2": -170.18429565429688, "logps_train/policy_1_l": -161.64849853515625, "logps_train/policy_1_w": -113.69849395751953, "logps_train/policy_2_2": -131.73622131347656, "logps_train/policy_2_w": -158.53822326660156, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": 1.1846954822540283, "rewards_train/1-l": -1.2062557935714722, "rewards_train/1-w": 1.9658925533294678, "rewards_train/2-2": 1.8341898918151855, "rewards_train/2-w": 0.8942245244979858, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.17214834690094, "rewards_train/margins_1": 0.7811970710754395, "rewards_train/margins_2": 0.9399653673171997, "step": 234 }, { "epoch": 0.7, "logps_train/policy_1_2": -166.1919403076172, "logps_train/policy_1_l": -177.57363891601562, "logps_train/policy_1_w": -156.32159423828125, "logps_train/policy_2_2": -128.05816650390625, "logps_train/policy_2_w": -200.8656768798828, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.215181589126587, "rewards_train/1-l": -1.6376608610153198, "rewards_train/1-w": 2.3043630123138428, "rewards_train/2-2": 2.2504329681396484, "rewards_train/2-w": 1.3730016946792603, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9420238733291626, "rewards_train/margins_1": 1.0891814231872559, "rewards_train/margins_2": 0.8774312734603882, "step": 234 }, { "epoch": 0.7, "logps_train/policy_1_2": -182.56143188476562, "logps_train/policy_1_l": -188.6984100341797, "logps_train/policy_1_w": -140.01821899414062, "logps_train/policy_2_2": -135.02854919433594, "logps_train/policy_2_w": -198.28424072265625, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 0.8969817161560059, "rewards_train/1-l": -1.6159347295761108, "rewards_train/1-w": 2.2981789112091064, "rewards_train/2-2": 2.3502702713012695, "rewards_train/2-w": 0.2778260111808777, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.9141136407852173, "rewards_train/margins_1": 1.4011971950531006, "rewards_train/margins_2": 2.072444260120392, "step": 234 }, { "epoch": 0.7, "logps_train/policy_1_2": -184.77420043945312, "logps_train/policy_1_l": -266.9966125488281, "logps_train/policy_1_w": -246.4417266845703, "logps_train/policy_2_2": -150.25628662109375, "logps_train/policy_2_w": -312.91448974609375, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -248.0, "logps_train/ref_1_w": -282.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -324.0, "rewards_train/1-2": 2.147581100463867, "rewards_train/1-l": -1.8496631383895874, "rewards_train/1-w": 3.4870781898498535, "rewards_train/2-2": 3.024372100830078, "rewards_train/2-w": 1.0085500478744507, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.336741328239441, "rewards_train/margins_1": 1.3394970893859863, "rewards_train/margins_2": 2.0158220529556274, "step": 234 }, { "epoch": 0.7, "logps_train/policy_1_2": -152.3155517578125, "logps_train/policy_1_l": -165.97933959960938, "logps_train/policy_1_w": -153.56695556640625, "logps_train/policy_2_2": -116.797607421875, "logps_train/policy_2_w": -210.20547485351562, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 0.9371939897537231, "rewards_train/1-l": -1.039339542388916, "rewards_train/1-w": 2.1886167526245117, "rewards_train/2-2": 1.6296138763427734, "rewards_train/2-w": 0.8466392159461975, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2279562950134277, "rewards_train/margins_1": 1.2514227628707886, "rewards_train/margins_2": 0.7829746603965759, "step": 234 }, { "epoch": 0.7, "logps_train/policy_1_2": -102.9551010131836, "logps_train/policy_1_l": -96.9192886352539, "logps_train/policy_1_w": -59.46527099609375, "logps_train/policy_2_2": -81.66326904296875, "logps_train/policy_2_w": -81.58944702148438, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -83.5, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": 1.1685528755187988, "rewards_train/1-l": -1.3525731563568115, "rewards_train/1-w": 1.825347900390625, "rewards_train/2-2": 2.0047671794891357, "rewards_train/2-w": 1.1715247631072998, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.1779210567474365, "rewards_train/margins_1": 0.6567950248718262, "rewards_train/margins_2": 0.8332424163818359, "step": 234 }, { "epoch": 0.7, "logps_train/policy_1_2": -209.05654907226562, "logps_train/policy_1_l": -177.03253173828125, "logps_train/policy_1_w": -166.4898681640625, "logps_train/policy_2_2": -169.62844848632812, "logps_train/policy_2_w": -199.87393188476562, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.346690058708191, "rewards_train/1-l": -1.8657537698745728, "rewards_train/1-w": 2.342029094696045, "rewards_train/2-2": 2.9965298175811768, "rewards_train/2-w": 1.2743260860443115, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.207782864570618, "rewards_train/margins_1": 0.995339035987854, "rewards_train/margins_2": 1.7222037315368652, "step": 235 }, { "epoch": 0.7, "logps_train/policy_1_2": -159.93125915527344, "logps_train/policy_1_l": -163.39791870117188, "logps_train/policy_1_w": -91.47901916503906, "logps_train/policy_2_2": -125.98905181884766, "logps_train/policy_2_w": -117.84749603271484, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -109.5, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 0.8736710548400879, "rewards_train/1-l": -2.375143051147461, "rewards_train/1-w": 1.8089336156845093, "rewards_train/2-2": 2.233907699584961, "rewards_train/2-w": 1.1367346048355103, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.18407666683197, "rewards_train/margins_1": 0.9352625608444214, "rewards_train/margins_2": 1.0971730947494507, "step": 235 }, { "epoch": 0.7, "logps_train/policy_1_2": -121.86246490478516, "logps_train/policy_1_l": -113.61970520019531, "logps_train/policy_1_w": -74.1271743774414, "logps_train/policy_2_2": -89.41180419921875, "logps_train/policy_2_w": -104.32463836669922, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 0.4723471999168396, "rewards_train/1-l": -1.3658769130706787, "rewards_train/1-w": 2.6396260261535645, "rewards_train/2-2": 1.4596003293991089, "rewards_train/2-w": 1.7628487348556519, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.005502939224243, "rewards_train/margins_1": 2.167278826236725, "rewards_train/margins_2": -0.30324840545654297, "step": 235 }, { "epoch": 0.7, "logps_train/policy_1_2": -228.77793884277344, "logps_train/policy_1_l": -155.93748474121094, "logps_train/policy_1_w": -119.0453109741211, "logps_train/policy_2_2": -183.04905700683594, "logps_train/policy_2_w": -169.5567626953125, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 0.9792370796203613, "rewards_train/1-l": -1.815232753753662, "rewards_train/1-w": 2.9386324882507324, "rewards_train/2-2": 2.6177501678466797, "rewards_train/2-w": 0.9224485754966736, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.7538652420043945, "rewards_train/margins_1": 1.959395408630371, "rewards_train/margins_2": 1.695301592350006, "step": 235 }, { "epoch": 0.7, "logps_train/policy_1_2": -196.56044006347656, "logps_train/policy_1_l": -256.1375427246094, "logps_train/policy_1_w": -179.05088806152344, "logps_train/policy_2_2": -162.60308837890625, "logps_train/policy_2_w": -209.44403076171875, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -238.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 0.6736435294151306, "rewards_train/1-l": -1.752328872680664, "rewards_train/1-w": 2.2230353355407715, "rewards_train/2-2": 1.8178176879882812, "rewards_train/2-w": 1.195440649986267, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9753642082214355, "rewards_train/margins_1": 1.5493918061256409, "rewards_train/margins_2": 0.6223770380020142, "step": 235 }, { "epoch": 0.7, "logps_train/policy_1_2": -170.84156799316406, "logps_train/policy_1_l": -96.07846069335938, "logps_train/policy_1_w": -80.93251037597656, "logps_train/policy_2_2": -121.36601257324219, "logps_train/policy_2_w": -112.44029998779297, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -121.5, "rewards_train/1-2": 0.6705304980278015, "rewards_train/1-l": -0.92268967628479, "rewards_train/1-w": 1.6200305223464966, "rewards_train/2-2": 2.7180867195129395, "rewards_train/2-w": 0.9309697151184082, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.5427201986312866, "rewards_train/margins_1": 0.9495000243186951, "rewards_train/margins_2": 1.7871170043945312, "step": 235 }, { "epoch": 0.7, "logps_train/policy_1_2": -96.0552978515625, "logps_train/policy_1_l": -91.18216705322266, "logps_train/policy_1_w": -99.89231872558594, "logps_train/policy_2_2": -85.19114685058594, "logps_train/policy_2_w": -117.7508544921875, "logps_train/ref_1_2": -103.5, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.7608768939971924, "rewards_train/1-l": -0.9334514141082764, "rewards_train/1-w": 2.0717058181762695, "rewards_train/2-2": 1.1844006776809692, "rewards_train/2-w": 1.5983517169952393, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 3.005157232284546, "rewards_train/margins_1": 1.3108289241790771, "rewards_train/margins_2": -0.41395103931427, "step": 235 }, { "epoch": 0.7, "logps_train/policy_1_2": -96.73158264160156, "logps_train/policy_1_l": -92.11862182617188, "logps_train/policy_1_w": -82.85888671875, "logps_train/policy_2_2": -72.33554077148438, "logps_train/policy_2_w": -110.5191421508789, "logps_train/ref_1_2": -96.0, "logps_train/ref_1_l": -84.5, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -81.5, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": -0.09229873865842819, "rewards_train/1-l": -0.7751431465148926, "rewards_train/1-w": 1.8477050065994263, "rewards_train/2-2": 0.9273830056190491, "rewards_train/2-w": 1.0004303455352783, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.622848153114319, "rewards_train/margins_1": 1.9400037452578545, "rewards_train/margins_2": -0.07304733991622925, "step": 235 }, { "epoch": 0.71, "learning_rate": 3.855377556903897e-06, "loss": 0.6383, "step": 236 }, { "epoch": 0.71, "logps_train/policy_1_2": -214.99485778808594, "logps_train/policy_1_l": -251.10447692871094, "logps_train/policy_1_w": -212.6234893798828, "logps_train/policy_2_2": -176.7522430419922, "logps_train/policy_2_w": -262.9167785644531, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -248.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -276.0, "rewards_train/1-2": 1.5659441947937012, "rewards_train/1-l": -2.8944337368011475, "rewards_train/1-w": 3.5845260620117188, "rewards_train/2-2": 2.8964555263519287, "rewards_train/2-w": 1.341914176940918, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.478959798812866, "rewards_train/margins_1": 2.0185818672180176, "rewards_train/margins_2": 1.5545413494110107, "step": 236 }, { "epoch": 0.71, "logps_train/policy_1_2": -160.02650451660156, "logps_train/policy_1_l": -78.76957702636719, "logps_train/policy_1_w": -93.83783721923828, "logps_train/policy_2_2": -116.50029754638672, "logps_train/policy_2_w": -114.17823791503906, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -75.0, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 1.4356300830841064, "rewards_train/1-l": -0.3812054693698883, "rewards_train/1-w": 1.2814992666244507, "rewards_train/2-2": 2.630439281463623, "rewards_train/2-w": 0.5399395227432251, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.662704735994339, "rewards_train/margins_1": -0.15413081645965576, "rewards_train/margins_2": 2.090499758720398, "step": 236 }, { "epoch": 0.71, "logps_train/policy_1_2": -169.26992797851562, "logps_train/policy_1_l": -200.2607421875, "logps_train/policy_1_w": -168.88282775878906, "logps_train/policy_2_2": -133.3288116455078, "logps_train/policy_2_w": -211.73269653320312, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.0511324405670166, "rewards_train/1-l": -1.2135730981826782, "rewards_train/1-w": 2.2279279232025146, "rewards_train/2-2": 1.7171190977096558, "rewards_train/2-w": 0.7804415225982666, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.441501021385193, "rewards_train/margins_1": 1.176795482635498, "rewards_train/margins_2": 0.9366775751113892, "step": 236 }, { "epoch": 0.71, "logps_train/policy_1_2": -179.44793701171875, "logps_train/policy_1_l": -190.8252410888672, "logps_train/policy_1_w": -137.99293518066406, "logps_train/policy_2_2": -149.69076538085938, "logps_train/policy_2_w": -170.0458221435547, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.8567674160003662, "rewards_train/1-l": -1.9764692783355713, "rewards_train/1-w": 2.480393886566162, "rewards_train/2-2": 2.7410802841186523, "rewards_train/2-w": 1.5452215671539307, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.456863164901733, "rewards_train/margins_1": 0.6236264705657959, "rewards_train/margins_2": 1.1958587169647217, "step": 236 }, { "epoch": 0.71, "logps_train/policy_1_2": -166.80462646484375, "logps_train/policy_1_l": -163.01144409179688, "logps_train/policy_1_w": -175.94940185546875, "logps_train/policy_2_2": -131.51007080078125, "logps_train/policy_2_w": -214.42266845703125, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.5164128541946411, "rewards_train/1-l": -1.5741918087005615, "rewards_train/1-w": 3.0363094806671143, "rewards_train/2-2": 2.7161808013916016, "rewards_train/2-w": 1.5733588933944702, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.610501289367676, "rewards_train/margins_1": 1.5198966264724731, "rewards_train/margins_2": 1.1428219079971313, "step": 236 }, { "epoch": 0.71, "logps_train/policy_1_2": -133.58152770996094, "logps_train/policy_1_l": -104.63103485107422, "logps_train/policy_1_w": -80.31224060058594, "logps_train/policy_2_2": -101.96315002441406, "logps_train/policy_2_w": -107.28962707519531, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 0.49262893199920654, "rewards_train/1-l": -1.1537282466888428, "rewards_train/1-w": 2.014284133911133, "rewards_train/2-2": 1.597825527191162, "rewards_train/2-w": 1.301016926765442, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.1680123805999756, "rewards_train/margins_1": 1.5216552019119263, "rewards_train/margins_2": 0.2968086004257202, "step": 236 }, { "epoch": 0.71, "logps_train/policy_1_2": -114.53480529785156, "logps_train/policy_1_l": -76.94123077392578, "logps_train/policy_1_w": -32.82745361328125, "logps_train/policy_2_2": -92.8951416015625, "logps_train/policy_2_w": -55.308349609375, "logps_train/ref_1_2": -122.0, "logps_train/ref_1_l": -66.5, "logps_train/ref_1_w": -45.25, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -63.5, "rewards_train/1-2": 0.7527698278427124, "rewards_train/1-l": -1.0527169704437256, "rewards_train/1-w": 1.250457763671875, "rewards_train/2-2": 1.1826542615890503, "rewards_train/2-w": 0.8140866756439209, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.3031747341156006, "rewards_train/margins_1": 0.4976879358291626, "rewards_train/margins_2": 0.3685675859451294, "step": 236 }, { "epoch": 0.71, "logps_train/policy_1_2": -115.38665771484375, "logps_train/policy_1_l": -101.68324279785156, "logps_train/policy_1_w": -89.05140686035156, "logps_train/policy_2_2": -89.03009033203125, "logps_train/policy_2_w": -125.393798828125, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -93.5, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 1.371490478515625, "rewards_train/1-l": -0.8224266767501831, "rewards_train/1-w": 1.4761090278625488, "rewards_train/2-2": 2.3118340969085693, "rewards_train/2-w": 0.2606201469898224, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.298535704612732, "rewards_train/margins_1": 0.10461854934692383, "rewards_train/margins_2": 2.051213949918747, "step": 236 }, { "epoch": 0.71, "logps_train/policy_1_2": -245.73532104492188, "logps_train/policy_1_l": -276.9446105957031, "logps_train/policy_1_w": -222.81967163085938, "logps_train/policy_2_2": -196.9971923828125, "logps_train/policy_2_w": -301.8526611328125, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -260.0, "logps_train/ref_1_w": -268.0, "logps_train/ref_2_2": -229.0, "logps_train/ref_2_w": -320.0, "rewards_train/1-2": 2.1905298233032227, "rewards_train/1-l": -1.6694614887237549, "rewards_train/1-w": 4.578774452209473, "rewards_train/2-2": 3.1799678802490234, "rewards_train/2-w": 1.7877821922302246, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.2482359409332275, "rewards_train/margins_1": 2.38824462890625, "rewards_train/margins_2": 1.3921856880187988, "step": 237 }, { "epoch": 0.71, "logps_train/policy_1_2": -168.3470001220703, "logps_train/policy_1_l": -199.89088439941406, "logps_train/policy_1_w": -187.78341674804688, "logps_train/policy_2_2": -130.17562866210938, "logps_train/policy_2_w": -248.49578857421875, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 1.6840500831604004, "rewards_train/1-l": -1.4437761306762695, "rewards_train/1-w": 2.9419710636138916, "rewards_train/2-2": 2.749624729156494, "rewards_train/2-w": 1.0754201412200928, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.385747194290161, "rewards_train/margins_1": 1.2579209804534912, "rewards_train/margins_2": 1.6742045879364014, "step": 237 }, { "epoch": 0.71, "logps_train/policy_1_2": -168.60305786132812, "logps_train/policy_1_l": -118.43263244628906, "logps_train/policy_1_w": -131.86395263671875, "logps_train/policy_2_2": -135.99691772460938, "logps_train/policy_2_w": -166.19265747070312, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.1443815231323242, "rewards_train/1-l": -0.8845714330673218, "rewards_train/1-w": 2.3237602710723877, "rewards_train/2-2": 1.9221837520599365, "rewards_train/2-w": 1.1994843482971191, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.2083317041397095, "rewards_train/margins_1": 1.1793787479400635, "rewards_train/margins_2": 0.7226994037628174, "step": 237 }, { "epoch": 0.71, "logps_train/policy_1_2": -157.26031494140625, "logps_train/policy_1_l": -193.25863647460938, "logps_train/policy_1_w": -128.45123291015625, "logps_train/policy_2_2": -118.13892364501953, "logps_train/policy_2_w": -175.24325561523438, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.253852367401123, "rewards_train/1-l": -1.6518398523330688, "rewards_train/1-w": 2.7244091033935547, "rewards_train/2-2": 2.672435998916626, "rewards_train/2-w": 1.4553608894348145, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.3762489557266235, "rewards_train/margins_1": 1.4705567359924316, "rewards_train/margins_2": 1.2170751094818115, "step": 237 }, { "epoch": 0.71, "logps_train/policy_1_2": -191.93605041503906, "logps_train/policy_1_l": -205.0675811767578, "logps_train/policy_1_w": -181.53155517578125, "logps_train/policy_2_2": -167.09776306152344, "logps_train/policy_2_w": -224.39105224609375, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -211.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.7001447677612305, "rewards_train/1-l": -1.4551957845687866, "rewards_train/1-w": 2.968719959259033, "rewards_train/2-2": 2.383974552154541, "rewards_train/2-w": 1.3624567985534668, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.42391574382782, "rewards_train/margins_1": 1.2685751914978027, "rewards_train/margins_2": 1.0215177536010742, "step": 237 }, { "epoch": 0.71, "logps_train/policy_1_2": -156.49716186523438, "logps_train/policy_1_l": -135.79226684570312, "logps_train/policy_1_w": -87.92353820800781, "logps_train/policy_2_2": -127.69927978515625, "logps_train/policy_2_w": -106.72077941894531, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 0.6549727320671082, "rewards_train/1-l": -1.3547160625457764, "rewards_train/1-w": 2.380302906036377, "rewards_train/2-2": 1.9042904376983643, "rewards_train/2-w": 2.013859748840332, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.7350189685821533, "rewards_train/margins_1": 1.7253301739692688, "rewards_train/margins_2": -0.10956931114196777, "step": 237 }, { "epoch": 0.71, "logps_train/policy_1_2": -138.58969116210938, "logps_train/policy_1_l": -143.63815307617188, "logps_train/policy_1_w": -148.38795471191406, "logps_train/policy_2_2": -111.78036499023438, "logps_train/policy_2_w": -173.31362915039062, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.5814611911773682, "rewards_train/1-l": -1.370847225189209, "rewards_train/1-w": 2.000070571899414, "rewards_train/2-2": 2.403409004211426, "rewards_train/2-w": 0.9805504083633423, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.370917797088623, "rewards_train/margins_1": 0.4186093807220459, "rewards_train/margins_2": 1.4228585958480835, "step": 237 }, { "epoch": 0.71, "logps_train/policy_1_2": -131.78488159179688, "logps_train/policy_1_l": -96.05778503417969, "logps_train/policy_1_w": -68.23301696777344, "logps_train/policy_2_2": -102.43069458007812, "logps_train/policy_2_w": -98.01778411865234, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -85.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -106.5, "rewards_train/1-2": 0.2582300007343292, "rewards_train/1-l": -0.5988665223121643, "rewards_train/1-w": 1.6786518096923828, "rewards_train/2-2": 1.1791962385177612, "rewards_train/2-w": 0.8423622846603394, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.277518332004547, "rewards_train/margins_1": 1.4204218089580536, "rewards_train/margins_2": 0.3368339538574219, "step": 237 }, { "epoch": 0.71, "learning_rate": 3.834558444911978e-06, "loss": 0.7301, "step": 238 }, { "epoch": 0.71, "logps_train/policy_1_2": -164.10792541503906, "logps_train/policy_1_l": -185.90155029296875, "logps_train/policy_1_w": -163.447265625, "logps_train/policy_2_2": -115.45939636230469, "logps_train/policy_2_w": -202.412109375, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 1.6142072677612305, "rewards_train/1-l": -2.3917183876037598, "rewards_train/1-w": 3.0466785430908203, "rewards_train/2-2": 3.237459182739258, "rewards_train/2-w": 1.472851276397705, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.43839693069458, "rewards_train/margins_1": 1.4324712753295898, "rewards_train/margins_2": 1.7646079063415527, "step": 238 }, { "epoch": 0.71, "logps_train/policy_1_2": -154.43544006347656, "logps_train/policy_1_l": -169.75827026367188, "logps_train/policy_1_w": -150.59487915039062, "logps_train/policy_2_2": -123.10633850097656, "logps_train/policy_2_w": -186.80783081054688, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.0111432075500488, "rewards_train/1-l": -2.1687967777252197, "rewards_train/1-w": 2.308480739593506, "rewards_train/2-2": 2.0940542221069336, "rewards_train/2-w": 0.9207786917686462, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.477277517318726, "rewards_train/margins_1": 1.297337532043457, "rewards_train/margins_2": 1.1732755303382874, "step": 238 }, { "epoch": 0.71, "logps_train/policy_1_2": -192.72195434570312, "logps_train/policy_1_l": -200.45135498046875, "logps_train/policy_1_w": -114.91258239746094, "logps_train/policy_2_2": -153.73345947265625, "logps_train/policy_2_w": -140.7920379638672, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.65749192237854, "rewards_train/1-l": -2.192206859588623, "rewards_train/1-w": 1.4978046417236328, "rewards_train/2-2": 3.290717124938965, "rewards_train/2-w": 0.8770453929901123, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.690011501312256, "rewards_train/margins_1": -0.15968728065490723, "rewards_train/margins_2": 2.4136717319488525, "step": 238 }, { "epoch": 0.71, "logps_train/policy_1_2": -141.93515014648438, "logps_train/policy_1_l": -113.79764556884766, "logps_train/policy_1_w": -76.75346374511719, "logps_train/policy_2_2": -109.30291748046875, "logps_train/policy_2_w": -107.84510803222656, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -93.5, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": 0.8291417360305786, "rewards_train/1-l": -1.9553511142730713, "rewards_train/1-w": 1.6926226615905762, "rewards_train/2-2": 1.7142390012741089, "rewards_train/2-w": 0.71705162525177, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6479737758636475, "rewards_train/margins_1": 0.8634809255599976, "rewards_train/margins_2": 0.9971873760223389, "step": 238 }, { "epoch": 0.71, "logps_train/policy_1_2": -116.11145782470703, "logps_train/policy_1_l": -90.87440490722656, "logps_train/policy_1_w": -84.97279357910156, "logps_train/policy_2_2": -91.26958465576172, "logps_train/policy_2_w": -105.40184783935547, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -80.5, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 0.8790885806083679, "rewards_train/1-l": -1.065956711769104, "rewards_train/1-w": 2.2855334281921387, "rewards_train/2-2": 1.7523386478424072, "rewards_train/2-w": 1.5191900730133057, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.3514901399612427, "rewards_train/margins_1": 1.4064448475837708, "rewards_train/margins_2": 0.23314857482910156, "step": 238 }, { "epoch": 0.71, "logps_train/policy_1_2": -278.4839172363281, "logps_train/policy_1_l": -301.3170471191406, "logps_train/policy_1_w": -241.60183715820312, "logps_train/policy_2_2": -221.59359741210938, "logps_train/policy_2_w": -308.401611328125, "logps_train/ref_1_2": -280.0, "logps_train/ref_1_l": -276.0, "logps_train/ref_1_w": -282.0, "logps_train/ref_2_2": -246.0, "logps_train/ref_2_w": -324.0, "rewards_train/1-2": 0.27973347902297974, "rewards_train/1-l": -2.462954521179199, "rewards_train/1-w": 4.127315521240234, "rewards_train/2-2": 2.412515640258789, "rewards_train/2-w": 1.666085958480835, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.590270042419434, "rewards_train/margins_1": 3.8475820422172546, "rewards_train/margins_2": 0.7464296817779541, "step": 238 }, { "epoch": 0.71, "logps_train/policy_1_2": -143.0066680908203, "logps_train/policy_1_l": -135.1790771484375, "logps_train/policy_1_w": -117.2842025756836, "logps_train/policy_2_2": -123.24756622314453, "logps_train/policy_2_w": -143.0203857421875, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.5837082862854004, "rewards_train/1-l": -1.2347042560577393, "rewards_train/1-w": 2.7403290271759033, "rewards_train/2-2": 2.296337604522705, "rewards_train/2-w": 1.7510859966278076, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.9750332832336426, "rewards_train/margins_1": 1.156620740890503, "rewards_train/margins_2": 0.5452516078948975, "step": 238 }, { "epoch": 0.71, "logps_train/policy_1_2": -82.84292602539062, "logps_train/policy_1_l": -74.98855590820312, "logps_train/policy_1_w": -72.63095092773438, "logps_train/policy_2_2": -68.3266372680664, "logps_train/policy_2_w": -90.11732482910156, "logps_train/ref_1_2": -92.0, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -83.5, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 0.9125825762748718, "rewards_train/1-l": -0.4937779903411865, "rewards_train/1-w": 1.3275301456451416, "rewards_train/2-2": 1.5327657461166382, "rewards_train/2-w": 0.7499861717224121, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.8213081359863281, "rewards_train/margins_1": 0.4149475693702698, "rewards_train/margins_2": 0.7827795743942261, "step": 238 }, { "epoch": 0.72, "logps_train/policy_1_2": -140.4046630859375, "logps_train/policy_1_l": -129.8705596923828, "logps_train/policy_1_w": -123.37362670898438, "logps_train/policy_2_2": -120.60040283203125, "logps_train/policy_2_w": -147.720703125, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.7009401321411133, "rewards_train/1-l": -1.1825644969940186, "rewards_train/1-w": 2.3829503059387207, "rewards_train/2-2": 2.4282407760620117, "rewards_train/2-w": 1.563084363937378, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5655148029327393, "rewards_train/margins_1": 0.6820101737976074, "rewards_train/margins_2": 0.8651564121246338, "step": 239 }, { "epoch": 0.72, "logps_train/policy_1_2": -150.52915954589844, "logps_train/policy_1_l": -141.60232543945312, "logps_train/policy_1_w": -103.5606689453125, "logps_train/policy_2_2": -127.56843566894531, "logps_train/policy_2_w": -121.94273376464844, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.6681784987449646, "rewards_train/1-l": -1.2188254594802856, "rewards_train/1-w": 1.8376836776733398, "rewards_train/2-2": 1.3347580432891846, "rewards_train/2-w": 1.2252585887908936, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.0565091371536255, "rewards_train/margins_1": 1.1695051789283752, "rewards_train/margins_2": 0.10949945449829102, "step": 239 }, { "epoch": 0.72, "logps_train/policy_1_2": -226.51284790039062, "logps_train/policy_1_l": -234.28213500976562, "logps_train/policy_1_w": -204.78781127929688, "logps_train/policy_2_2": -178.9801025390625, "logps_train/policy_2_w": -245.46104431152344, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -234.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -258.0, "rewards_train/1-2": 1.5846531391143799, "rewards_train/1-l": -2.216493844985962, "rewards_train/1-w": 2.9688758850097656, "rewards_train/2-2": 3.0894885063171387, "rewards_train/2-w": 1.3320201635360718, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.1853697299957275, "rewards_train/margins_1": 1.3842227458953857, "rewards_train/margins_2": 1.757468342781067, "step": 239 }, { "epoch": 0.72, "logps_train/policy_1_2": -209.43408203125, "logps_train/policy_1_l": -163.76409912109375, "logps_train/policy_1_w": -147.93698120117188, "logps_train/policy_2_2": -169.19822692871094, "logps_train/policy_2_w": -191.93434143066406, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": 1.4065933227539062, "rewards_train/1-l": -2.1300230026245117, "rewards_train/1-w": 3.462552547454834, "rewards_train/2-2": 2.873927593231201, "rewards_train/2-w": 1.912815809249878, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.592575550079346, "rewards_train/margins_1": 2.0559592247009277, "rewards_train/margins_2": 0.9611117839813232, "step": 239 }, { "epoch": 0.72, "logps_train/policy_1_2": -77.15203857421875, "logps_train/policy_1_l": -138.76971435546875, "logps_train/policy_1_w": -102.586669921875, "logps_train/policy_2_2": -60.971763610839844, "logps_train/policy_2_w": -123.70671081542969, "logps_train/ref_1_2": -87.5, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -75.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 1.045734167098999, "rewards_train/1-l": -1.5855658054351807, "rewards_train/1-w": 1.8796141147613525, "rewards_train/2-2": 1.3942298889160156, "rewards_train/2-w": 1.085578441619873, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.465179920196533, "rewards_train/margins_1": 0.8338799476623535, "rewards_train/margins_2": 0.3086514472961426, "step": 239 }, { "epoch": 0.72, "logps_train/policy_1_2": -168.20159912109375, "logps_train/policy_1_l": -136.56793212890625, "logps_train/policy_1_w": -133.54185485839844, "logps_train/policy_2_2": -133.44390869140625, "logps_train/policy_2_w": -159.00030517578125, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.3173396587371826, "rewards_train/1-l": -0.4034733474254608, "rewards_train/1-w": 1.348549246788025, "rewards_train/2-2": 2.357171058654785, "rewards_train/2-w": 0.5324881672859192, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 1.7520225942134857, "rewards_train/margins_1": 0.031209588050842285, "rewards_train/margins_2": 1.824682891368866, "step": 239 }, { "epoch": 0.72, "logps_train/policy_1_2": -158.87449645996094, "logps_train/policy_1_l": -136.9566650390625, "logps_train/policy_1_w": -117.02888488769531, "logps_train/policy_2_2": -125.13880920410156, "logps_train/policy_2_w": -147.15255737304688, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 2.257081985473633, "rewards_train/1-l": -1.835510015487671, "rewards_train/1-w": 2.315861701965332, "rewards_train/2-2": 3.497056484222412, "rewards_train/2-w": 1.3554469347000122, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.151371717453003, "rewards_train/margins_1": 0.05877971649169922, "rewards_train/margins_2": 2.1416095495224, "step": 239 }, { "epoch": 0.72, "logps_train/policy_1_2": -162.80523681640625, "logps_train/policy_1_l": -168.37332153320312, "logps_train/policy_1_w": -168.32655334472656, "logps_train/policy_2_2": -139.91421508789062, "logps_train/policy_2_w": -212.18507385253906, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -231.0, "rewards_train/1-2": 2.221038579940796, "rewards_train/1-l": -1.6547163724899292, "rewards_train/1-w": 3.179844379425049, "rewards_train/2-2": 3.068735122680664, "rewards_train/2-w": 1.8932114839553833, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.834560751914978, "rewards_train/margins_1": 0.9588057994842529, "rewards_train/margins_2": 1.1755236387252808, "step": 239 }, { "epoch": 0.72, "learning_rate": 3.8136090823685156e-06, "loss": 0.7453, "step": 240 }, { "epoch": 0.72, "logps_train/policy_1_2": -72.98091888427734, "logps_train/policy_1_l": -76.6856689453125, "logps_train/policy_1_w": -51.556331634521484, "logps_train/policy_2_2": -53.42085266113281, "logps_train/policy_2_w": -66.80206298828125, "logps_train/ref_1_2": -77.0, "logps_train/ref_1_l": -71.5, "logps_train/ref_1_w": -64.0, "logps_train/ref_2_2": -63.25, "logps_train/ref_2_w": -78.5, "rewards_train/1-2": 0.3891148269176483, "rewards_train/1-l": -0.5115361213684082, "rewards_train/1-w": 1.258819818496704, "rewards_train/2-2": 0.9747604131698608, "rewards_train/2-w": 1.1676454544067383, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.7703559398651123, "rewards_train/margins_1": 0.8697049915790558, "rewards_train/margins_2": -0.19288504123687744, "step": 240 }, { "epoch": 0.72, "logps_train/policy_1_2": -88.35932922363281, "logps_train/policy_1_l": -87.54345703125, "logps_train/policy_1_w": -103.37406921386719, "logps_train/policy_2_2": -63.098548889160156, "logps_train/policy_2_w": -135.4947052001953, "logps_train/ref_1_2": -95.0, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -75.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 0.6835983395576477, "rewards_train/1-l": -0.5479009747505188, "rewards_train/1-w": 1.860445261001587, "rewards_train/2-2": 1.1944423913955688, "rewards_train/2-w": 0.9361251592636108, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.4083462357521057, "rewards_train/margins_1": 1.1768469214439392, "rewards_train/margins_2": 0.258317232131958, "step": 240 }, { "epoch": 0.72, "logps_train/policy_1_2": -111.69367980957031, "logps_train/policy_1_l": -98.14220428466797, "logps_train/policy_1_w": -86.05021667480469, "logps_train/policy_2_2": -83.56474304199219, "logps_train/policy_2_w": -114.32772827148438, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -85.5, "logps_train/ref_1_w": -112.5, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": 0.804069459438324, "rewards_train/1-l": -1.2608997821807861, "rewards_train/1-w": 2.6231026649475098, "rewards_train/2-2": 1.629658579826355, "rewards_train/2-w": 1.3281642198562622, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.884002447128296, "rewards_train/margins_1": 1.8190332055091858, "rewards_train/margins_2": 0.3014943599700928, "step": 240 }, { "epoch": 0.72, "logps_train/policy_1_2": -175.72494506835938, "logps_train/policy_1_l": -244.8386993408203, "logps_train/policy_1_w": -173.61216735839844, "logps_train/policy_2_2": -138.34060668945312, "logps_train/policy_2_w": -237.455322265625, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -224.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 1.4634432792663574, "rewards_train/1-l": -2.0698094367980957, "rewards_train/1-w": 4.298157691955566, "rewards_train/2-2": 2.5854721069335938, "rewards_train/2-w": 2.145092010498047, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.367967128753662, "rewards_train/margins_1": 2.834714412689209, "rewards_train/margins_2": 0.4403800964355469, "step": 240 }, { "epoch": 0.72, "logps_train/policy_1_2": -120.35758972167969, "logps_train/policy_1_l": -85.87293243408203, "logps_train/policy_1_w": -69.95417785644531, "logps_train/policy_2_2": -98.18721008300781, "logps_train/policy_2_w": -85.79893493652344, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -75.5, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -118.5, "logps_train/ref_2_w": -98.5, "rewards_train/1-2": 1.3650226593017578, "rewards_train/1-l": -1.0505746603012085, "rewards_train/1-w": 1.682316541671753, "rewards_train/2-2": 2.040947437286377, "rewards_train/2-w": 1.2779196500778198, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.7328912019729614, "rewards_train/margins_1": 0.3172938823699951, "rewards_train/margins_2": 0.7630277872085571, "step": 240 }, { "epoch": 0.72, "logps_train/policy_1_2": -157.53909301757812, "logps_train/policy_1_l": -156.956298828125, "logps_train/policy_1_w": -121.87586212158203, "logps_train/policy_2_2": -131.255126953125, "logps_train/policy_2_w": -143.13206481933594, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.2398399114608765, "rewards_train/1-l": -1.0378183126449585, "rewards_train/1-w": 1.6952261924743652, "rewards_train/2-2": 2.1432366371154785, "rewards_train/2-w": 0.9196063876152039, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7330445051193237, "rewards_train/margins_1": 0.45538628101348877, "rewards_train/margins_2": 1.2236302495002747, "step": 240 }, { "epoch": 0.72, "logps_train/policy_1_2": -229.9168243408203, "logps_train/policy_1_l": -315.8520202636719, "logps_train/policy_1_w": -128.28578186035156, "logps_train/policy_2_2": -186.6859893798828, "logps_train/policy_2_w": -166.72898864746094, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -292.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -207.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 0.7348811626434326, "rewards_train/1-l": -2.5215306282043457, "rewards_train/1-w": 2.287827491760254, "rewards_train/2-2": 2.059525728225708, "rewards_train/2-w": 1.66460120677948, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.8093581199646, "rewards_train/margins_1": 1.5529463291168213, "rewards_train/margins_2": 0.394924521446228, "step": 240 }, { "epoch": 0.72, "logps_train/policy_1_2": -112.88214111328125, "logps_train/policy_1_l": -101.818603515625, "logps_train/policy_1_w": -118.45269775390625, "logps_train/policy_2_2": -91.00286865234375, "logps_train/policy_2_w": -151.1613311767578, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.0719414949417114, "rewards_train/1-l": -0.930688738822937, "rewards_train/1-w": 2.4320740699768066, "rewards_train/2-2": 1.8434624671936035, "rewards_train/2-w": 1.6479299068450928, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.3627628087997437, "rewards_train/margins_1": 1.3601325750350952, "rewards_train/margins_2": 0.19553256034851074, "step": 240 }, { "epoch": 0.72, "logps_train/policy_1_2": -236.1190185546875, "logps_train/policy_1_l": -190.1028289794922, "logps_train/policy_1_w": -183.8807373046875, "logps_train/policy_2_2": -197.8343963623047, "logps_train/policy_2_w": -223.3209991455078, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -216.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 2.16622257232666, "rewards_train/1-l": -1.2087197303771973, "rewards_train/1-w": 3.182239532470703, "rewards_train/2-2": 3.4259350299835205, "rewards_train/2-w": 1.6475883722305298, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.3909592628479, "rewards_train/margins_1": 1.016016960144043, "rewards_train/margins_2": 1.7783466577529907, "step": 241 }, { "epoch": 0.72, "logps_train/policy_1_2": -177.31704711914062, "logps_train/policy_1_l": -165.60726928710938, "logps_train/policy_1_w": -94.86923217773438, "logps_train/policy_2_2": -150.14584350585938, "logps_train/policy_2_w": -123.63672637939453, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 1.568296194076538, "rewards_train/1-l": -1.5720794200897217, "rewards_train/1-w": 1.9529205560684204, "rewards_train/2-2": 2.6197900772094727, "rewards_train/2-w": 1.4332023859024048, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.524999976158142, "rewards_train/margins_1": 0.3846243619918823, "rewards_train/margins_2": 1.1865876913070679, "step": 241 }, { "epoch": 0.72, "logps_train/policy_1_2": -109.76991271972656, "logps_train/policy_1_l": -166.49887084960938, "logps_train/policy_1_w": -124.04920196533203, "logps_train/policy_2_2": -90.19682312011719, "logps_train/policy_2_w": -152.64138793945312, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.4526959657669067, "rewards_train/1-l": -1.7432472705841064, "rewards_train/1-w": 2.3263301849365234, "rewards_train/2-2": 1.899848222732544, "rewards_train/2-w": 1.2327364683151245, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.06957745552063, "rewards_train/margins_1": 0.8736342191696167, "rewards_train/margins_2": 0.6671117544174194, "step": 241 }, { "epoch": 0.72, "logps_train/policy_1_2": -147.79762268066406, "logps_train/policy_1_l": -121.89726257324219, "logps_train/policy_1_w": -104.15693664550781, "logps_train/policy_2_2": -115.0685806274414, "logps_train/policy_2_w": -141.01663208007812, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -110.5, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.9983625411987305, "rewards_train/1-l": -1.1444132328033447, "rewards_train/1-w": 1.8249311447143555, "rewards_train/2-2": 2.7712666988372803, "rewards_train/2-w": 0.4780241847038269, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.9693443775177, "rewards_train/margins_1": -0.173431396484375, "rewards_train/margins_2": 2.2932425141334534, "step": 241 }, { "epoch": 0.72, "logps_train/policy_1_2": -114.20173645019531, "logps_train/policy_1_l": -145.23519897460938, "logps_train/policy_1_w": -97.82080841064453, "logps_train/policy_2_2": -91.53643798828125, "logps_train/policy_2_w": -121.70852661132812, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 0.9696694016456604, "rewards_train/1-l": -1.7830902338027954, "rewards_train/1-w": 2.1282708644866943, "rewards_train/2-2": 1.8555352687835693, "rewards_train/2-w": 1.1053194999694824, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9113610982894897, "rewards_train/margins_1": 1.158601462841034, "rewards_train/margins_2": 0.7502157688140869, "step": 241 }, { "epoch": 0.72, "logps_train/policy_1_2": -229.1521759033203, "logps_train/policy_1_l": -212.26513671875, "logps_train/policy_1_w": -153.81298828125, "logps_train/policy_2_2": -192.17771911621094, "logps_train/policy_2_w": -192.53021240234375, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 2.053044557571411, "rewards_train/1-l": -0.6676278710365295, "rewards_train/1-w": 2.4807140827178955, "rewards_train/2-2": 2.9304709434509277, "rewards_train/2-w": 1.373345136642456, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.148341953754425, "rewards_train/margins_1": 0.4276695251464844, "rewards_train/margins_2": 1.5571258068084717, "step": 241 }, { "epoch": 0.72, "logps_train/policy_1_2": -91.21009826660156, "logps_train/policy_1_l": -100.35328674316406, "logps_train/policy_1_w": -118.71683502197266, "logps_train/policy_2_2": -70.98155975341797, "logps_train/policy_2_w": -142.8507080078125, "logps_train/ref_1_2": -104.5, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.343052864074707, "rewards_train/1-l": -0.8711690902709961, "rewards_train/1-w": 1.9767539501190186, "rewards_train/2-2": 2.0018439292907715, "rewards_train/2-w": 1.0096555948257446, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8479230403900146, "rewards_train/margins_1": 0.6337010860443115, "rewards_train/margins_2": 0.9921883344650269, "step": 241 }, { "epoch": 0.72, "logps_train/policy_1_2": -129.058837890625, "logps_train/policy_1_l": -265.515625, "logps_train/policy_1_w": -94.5255126953125, "logps_train/policy_2_2": -99.59129333496094, "logps_train/policy_2_w": -121.07911682128906, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -239.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.6347407102584839, "rewards_train/1-l": -2.6304681301116943, "rewards_train/1-w": 1.617761254310608, "rewards_train/2-2": 2.408839702606201, "rewards_train/2-w": 1.1217763423919678, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.248229384422302, "rewards_train/margins_1": -0.016979455947875977, "rewards_train/margins_2": 1.2870633602142334, "step": 241 }, { "epoch": 0.72, "learning_rate": 3.792531513894365e-06, "loss": 0.7767, "step": 242 }, { "epoch": 0.72, "logps_train/policy_1_2": -170.64730834960938, "logps_train/policy_1_l": -167.35838317871094, "logps_train/policy_1_w": -134.65853881835938, "logps_train/policy_2_2": -141.42697143554688, "logps_train/policy_2_w": -170.79421997070312, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.083706021308899, "rewards_train/1-l": -1.1811515092849731, "rewards_train/1-w": 2.6583638191223145, "rewards_train/2-2": 2.076052665710449, "rewards_train/2-w": 1.4502654075622559, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8395153284072876, "rewards_train/margins_1": 1.5746577978134155, "rewards_train/margins_2": 0.6257872581481934, "step": 242 }, { "epoch": 0.72, "logps_train/policy_1_2": -139.300048828125, "logps_train/policy_1_l": -123.52767944335938, "logps_train/policy_1_w": -97.6034164428711, "logps_train/policy_2_2": -107.48207092285156, "logps_train/policy_2_w": -116.11573791503906, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.4426524639129639, "rewards_train/1-l": -1.0443700551986694, "rewards_train/1-w": 2.1349706649780273, "rewards_train/2-2": 2.2019877433776855, "rewards_train/2-w": 1.627000331878662, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.1793407201766968, "rewards_train/margins_1": 0.6923182010650635, "rewards_train/margins_2": 0.5749874114990234, "step": 242 }, { "epoch": 0.72, "logps_train/policy_1_2": -138.47573852539062, "logps_train/policy_1_l": -108.80888366699219, "logps_train/policy_1_w": -122.57694244384766, "logps_train/policy_2_2": -109.99197387695312, "logps_train/policy_2_w": -144.96456909179688, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.6524266600608826, "rewards_train/1-l": -0.7605761289596558, "rewards_train/1-w": 0.8219930529594421, "rewards_train/2-2": 1.334787368774414, "rewards_train/2-w": 0.13791795074939728, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.582569181919098, "rewards_train/margins_1": 0.16956639289855957, "rewards_train/margins_2": 1.1968694180250168, "step": 242 }, { "epoch": 0.72, "logps_train/policy_1_2": -205.07078552246094, "logps_train/policy_1_l": -166.51309204101562, "logps_train/policy_1_w": -106.2669677734375, "logps_train/policy_2_2": -167.45703125, "logps_train/policy_2_w": -139.92462158203125, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.0384284257888794, "rewards_train/1-l": -1.0606842041015625, "rewards_train/1-w": 2.58892822265625, "rewards_train/2-2": 2.0391602516174316, "rewards_train/2-w": 1.5325372219085693, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.6496124267578125, "rewards_train/margins_1": 1.5504997968673706, "rewards_train/margins_2": 0.5066230297088623, "step": 242 }, { "epoch": 0.72, "logps_train/policy_1_2": -120.38304138183594, "logps_train/policy_1_l": -98.0513916015625, "logps_train/policy_1_w": -112.06967163085938, "logps_train/policy_2_2": -95.08135223388672, "logps_train/policy_2_w": -142.48611450195312, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 0.6351335644721985, "rewards_train/1-l": -0.5626100301742554, "rewards_train/1-w": 2.3133456707000732, "rewards_train/2-2": 1.5621769428253174, "rewards_train/2-w": 1.4513882398605347, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.8759557008743286, "rewards_train/margins_1": 1.6782121062278748, "rewards_train/margins_2": 0.11078870296478271, "step": 242 }, { "epoch": 0.72, "logps_train/policy_1_2": -113.37397003173828, "logps_train/policy_1_l": -160.79922485351562, "logps_train/policy_1_w": -120.17338562011719, "logps_train/policy_2_2": -97.82630920410156, "logps_train/policy_2_w": -146.9019012451172, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.550102949142456, "rewards_train/1-l": -1.203749656677246, "rewards_train/1-w": 2.1100053787231445, "rewards_train/2-2": 1.9478377103805542, "rewards_train/2-w": 1.276997447013855, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3137550354003906, "rewards_train/margins_1": 0.5599024295806885, "rewards_train/margins_2": 0.6708402633666992, "step": 242 }, { "epoch": 0.72, "logps_train/policy_1_2": -308.2216796875, "logps_train/policy_1_l": -270.6365661621094, "logps_train/policy_1_w": -201.55892944335938, "logps_train/policy_2_2": -245.79904174804688, "logps_train/policy_2_w": -257.27130126953125, "logps_train/ref_1_2": -316.0, "logps_train/ref_1_l": -242.0, "logps_train/ref_1_w": -232.0, "logps_train/ref_2_2": -282.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 0.8647453784942627, "rewards_train/1-l": -2.9013519287109375, "rewards_train/1-w": 3.075356960296631, "rewards_train/2-2": 3.657498836517334, "rewards_train/2-w": 1.0806843042373657, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.976708889007568, "rewards_train/margins_1": 2.210611581802368, "rewards_train/margins_2": 2.5768145322799683, "step": 242 }, { "epoch": 0.72, "logps_train/policy_1_2": -112.4345474243164, "logps_train/policy_1_l": -107.22000122070312, "logps_train/policy_1_w": -87.16099548339844, "logps_train/policy_2_2": -89.62908172607422, "logps_train/policy_2_w": -103.19424438476562, "logps_train/ref_1_2": -125.0, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -112.5, "logps_train/ref_2_w": -115.5, "rewards_train/1-2": 1.2643579244613647, "rewards_train/1-l": -1.255203127861023, "rewards_train/1-w": 1.8323383331298828, "rewards_train/2-2": 2.279865026473999, "rewards_train/2-w": 1.2415127754211426, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0875414609909058, "rewards_train/margins_1": 0.5679804086685181, "rewards_train/margins_2": 1.0383522510528564, "step": 242 }, { "epoch": 0.73, "logps_train/policy_1_2": -169.56134033203125, "logps_train/policy_1_l": -153.54452514648438, "logps_train/policy_1_w": -150.69012451171875, "logps_train/policy_2_2": -141.79598999023438, "logps_train/policy_2_w": -179.07406616210938, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.5292179584503174, "rewards_train/1-l": -1.0919512510299683, "rewards_train/1-w": 3.2637996673583984, "rewards_train/2-2": 2.332609176635742, "rewards_train/2-w": 2.3097805976867676, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.355750918388367, "rewards_train/margins_1": 1.734581708908081, "rewards_train/margins_2": 0.02282857894897461, "step": 243 }, { "epoch": 0.73, "logps_train/policy_1_2": -107.53925323486328, "logps_train/policy_1_l": -121.25984191894531, "logps_train/policy_1_w": -55.75836181640625, "logps_train/policy_2_2": -86.91992950439453, "logps_train/policy_2_w": -75.84690856933594, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -70.5, "logps_train/ref_2_2": -101.0, "logps_train/ref_2_w": -86.0, "rewards_train/1-2": 0.6499810814857483, "rewards_train/1-l": -1.3865317106246948, "rewards_train/1-w": 1.4868593215942383, "rewards_train/2-2": 1.3962886333465576, "rewards_train/2-w": 1.013991117477417, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.873391032218933, "rewards_train/margins_1": 0.83687824010849, "rewards_train/margins_2": 0.3822975158691406, "step": 243 }, { "epoch": 0.73, "logps_train/policy_1_2": -252.31610107421875, "logps_train/policy_1_l": -227.6387939453125, "logps_train/policy_1_w": -162.58009338378906, "logps_train/policy_2_2": -201.0606231689453, "logps_train/policy_2_w": -206.32521057128906, "logps_train/ref_1_2": -272.0, "logps_train/ref_1_l": -213.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -238.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.9371392726898193, "rewards_train/1-l": -1.4279423952102661, "rewards_train/1-w": 2.6527326107025146, "rewards_train/2-2": 3.6814382076263428, "rewards_train/2-w": 1.5241198539733887, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.080675005912781, "rewards_train/margins_1": 0.7155933380126953, "rewards_train/margins_2": 2.157318353652954, "step": 243 }, { "epoch": 0.73, "logps_train/policy_1_2": -101.04989624023438, "logps_train/policy_1_l": -129.6446533203125, "logps_train/policy_1_w": -109.41285705566406, "logps_train/policy_2_2": -88.70124053955078, "logps_train/policy_2_w": -140.77737426757812, "logps_train/ref_1_2": -114.5, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 1.3610260486602783, "rewards_train/1-l": -1.115320086479187, "rewards_train/1-w": 2.3645737171173096, "rewards_train/2-2": 1.6290950775146484, "rewards_train/2-w": 1.0167927742004395, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4798938035964966, "rewards_train/margins_1": 1.0035476684570312, "rewards_train/margins_2": 0.612302303314209, "step": 243 }, { "epoch": 0.73, "logps_train/policy_1_2": -123.60558319091797, "logps_train/policy_1_l": -144.65048217773438, "logps_train/policy_1_w": -105.64816284179688, "logps_train/policy_2_2": -104.11104583740234, "logps_train/policy_2_w": -122.65599060058594, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 1.2882702350616455, "rewards_train/1-l": -1.4761568307876587, "rewards_train/1-w": 1.718605875968933, "rewards_train/2-2": 1.860379695892334, "rewards_train/2-w": 1.0457285642623901, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.194762706756592, "rewards_train/margins_1": 0.4303356409072876, "rewards_train/margins_2": 0.8146511316299438, "step": 243 }, { "epoch": 0.73, "logps_train/policy_1_2": -143.1558837890625, "logps_train/policy_1_l": -112.42500305175781, "logps_train/policy_1_w": -104.20153045654297, "logps_train/policy_2_2": -121.37506866455078, "logps_train/policy_2_w": -132.61807250976562, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -114.5, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 1.7867553234100342, "rewards_train/1-l": 0.2168748676776886, "rewards_train/1-w": 1.7392218112945557, "rewards_train/2-2": 2.4984304904937744, "rewards_train/2-w": 1.2303798198699951, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.522346943616867, "rewards_train/margins_1": -0.047533512115478516, "rewards_train/margins_2": 1.2680506706237793, "step": 243 }, { "epoch": 0.73, "logps_train/policy_1_2": -198.17506408691406, "logps_train/policy_1_l": -182.64862060546875, "logps_train/policy_1_w": -192.88442993164062, "logps_train/policy_2_2": -176.67755126953125, "logps_train/policy_2_w": -221.64224243164062, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -221.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.8387430906295776, "rewards_train/1-l": -0.9367378950119019, "rewards_train/1-w": 2.7631211280822754, "rewards_train/2-2": 2.5759942531585693, "rewards_train/2-w": 1.7139016389846802, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.6998590230941772, "rewards_train/margins_1": 0.9243780374526978, "rewards_train/margins_2": 0.8620926141738892, "step": 243 }, { "epoch": 0.73, "logps_train/policy_1_2": -204.5692596435547, "logps_train/policy_1_l": -173.3656768798828, "logps_train/policy_1_w": -139.23049926757812, "logps_train/policy_2_2": -168.99122619628906, "logps_train/policy_2_w": -170.47178649902344, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.8711988925933838, "rewards_train/1-l": -1.1742634773254395, "rewards_train/1-w": 2.119137763977051, "rewards_train/2-2": 3.0446274280548096, "rewards_train/2-w": 1.3590717315673828, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2934012413024902, "rewards_train/margins_1": 0.247938871383667, "rewards_train/margins_2": 1.6855556964874268, "step": 243 }, { "epoch": 0.73, "learning_rate": 3.7713277966230514e-06, "loss": 0.8186, "step": 244 }, { "epoch": 0.73, "logps_train/policy_1_2": -175.34730529785156, "logps_train/policy_1_l": -192.67013549804688, "logps_train/policy_1_w": -113.46166229248047, "logps_train/policy_2_2": -148.6655731201172, "logps_train/policy_2_w": -133.66493225097656, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.3668315410614014, "rewards_train/1-l": -1.7756054401397705, "rewards_train/1-w": 2.2413337230682373, "rewards_train/2-2": 2.166254997253418, "rewards_train/2-w": 1.6389756202697754, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.016939163208008, "rewards_train/margins_1": 0.8745021820068359, "rewards_train/margins_2": 0.5272793769836426, "step": 244 }, { "epoch": 0.73, "logps_train/policy_1_2": -141.77102661132812, "logps_train/policy_1_l": -232.82940673828125, "logps_train/policy_1_w": -137.9794158935547, "logps_train/policy_2_2": -121.2497787475586, "logps_train/policy_2_w": -165.94766235351562, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 1.9666461944580078, "rewards_train/1-l": -2.3199515342712402, "rewards_train/1-w": 2.2181711196899414, "rewards_train/2-2": 2.3984594345092773, "rewards_train/2-w": 1.3193944692611694, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.538122653961182, "rewards_train/margins_1": 0.2515249252319336, "rewards_train/margins_2": 1.079064965248108, "step": 244 }, { "epoch": 0.73, "logps_train/policy_1_2": -123.85470581054688, "logps_train/policy_1_l": -180.1348419189453, "logps_train/policy_1_w": -118.53497314453125, "logps_train/policy_2_2": -103.96768951416016, "logps_train/policy_2_w": -138.12411499023438, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -117.5, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 0.8679965734481812, "rewards_train/1-l": -1.2740315198898315, "rewards_train/1-w": 2.0230648517608643, "rewards_train/2-2": 1.3442466259002686, "rewards_train/2-w": 1.6094629764556885, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.297096371650696, "rewards_train/margins_1": 1.155068278312683, "rewards_train/margins_2": -0.2652163505554199, "step": 244 }, { "epoch": 0.73, "logps_train/policy_1_2": -149.7583465576172, "logps_train/policy_1_l": -172.77398681640625, "logps_train/policy_1_w": -97.27099609375, "logps_train/policy_2_2": -119.59077453613281, "logps_train/policy_2_w": -130.37734985351562, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 0.4460408091545105, "rewards_train/1-l": -2.1695849895477295, "rewards_train/1-w": 2.1043453216552734, "rewards_train/2-2": 1.5393595695495605, "rewards_train/2-w": 1.0872652530670166, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.273930311203003, "rewards_train/margins_1": 1.658304512500763, "rewards_train/margins_2": 0.45209431648254395, "step": 244 }, { "epoch": 0.73, "logps_train/policy_1_2": -92.76560974121094, "logps_train/policy_1_l": -79.0547103881836, "logps_train/policy_1_w": -86.6932144165039, "logps_train/policy_2_2": -76.17132568359375, "logps_train/policy_2_w": -106.0966796875, "logps_train/ref_1_2": -101.0, "logps_train/ref_1_l": -73.0, "logps_train/ref_1_w": -103.5, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 0.8320327997207642, "rewards_train/1-l": -0.5887230038642883, "rewards_train/1-w": 1.680971622467041, "rewards_train/2-2": 1.5180238485336304, "rewards_train/2-w": 1.0028324127197266, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.2696946263313293, "rewards_train/margins_1": 0.8489388227462769, "rewards_train/margins_2": 0.5151914358139038, "step": 244 }, { "epoch": 0.73, "logps_train/policy_1_2": -195.7194061279297, "logps_train/policy_1_l": -236.18359375, "logps_train/policy_1_w": -190.47213745117188, "logps_train/policy_2_2": -162.4819793701172, "logps_train/policy_2_w": -246.10377502441406, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -220.0, "logps_train/ref_1_w": -215.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.4155590534210205, "rewards_train/1-l": -1.6058578491210938, "rewards_train/1-w": 2.490285873413086, "rewards_train/2-2": 2.476802349090576, "rewards_train/2-w": 0.8208730816841125, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.09614372253418, "rewards_train/margins_1": 1.0747268199920654, "rewards_train/margins_2": 1.6559292674064636, "step": 244 }, { "epoch": 0.73, "logps_train/policy_1_2": -156.7838134765625, "logps_train/policy_1_l": -201.06292724609375, "logps_train/policy_1_w": -98.26393127441406, "logps_train/policy_2_2": -141.12155151367188, "logps_train/policy_2_w": -112.70899200439453, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 1.6278676986694336, "rewards_train/1-l": -2.3492624759674072, "rewards_train/1-w": 1.6361074447631836, "rewards_train/2-2": 2.1753439903259277, "rewards_train/2-w": 1.4064438343048096, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.985369920730591, "rewards_train/margins_1": 0.00823974609375, "rewards_train/margins_2": 0.7689001560211182, "step": 244 }, { "epoch": 0.73, "logps_train/policy_1_2": -174.2032928466797, "logps_train/policy_1_l": -184.4548797607422, "logps_train/policy_1_w": -214.43728637695312, "logps_train/policy_2_2": -147.04342651367188, "logps_train/policy_2_w": -276.0075378417969, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -247.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -288.0, "rewards_train/1-2": 1.5093584060668945, "rewards_train/1-l": -1.0925577878952026, "rewards_train/1-w": 3.2828338146209717, "rewards_train/2-2": 2.3464388847351074, "rewards_train/2-w": 1.1008110046386719, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.375391602516174, "rewards_train/margins_1": 1.7734754085540771, "rewards_train/margins_2": 1.2456278800964355, "step": 244 }, { "epoch": 0.73, "logps_train/policy_1_2": -208.72549438476562, "logps_train/policy_1_l": -172.70751953125, "logps_train/policy_1_w": -136.27499389648438, "logps_train/policy_2_2": -175.24887084960938, "logps_train/policy_2_w": -174.88246154785156, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 2.2837002277374268, "rewards_train/1-l": -0.8104007244110107, "rewards_train/1-w": 2.1240625381469727, "rewards_train/2-2": 3.2688636779785156, "rewards_train/2-w": 1.0883163213729858, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.9344632625579834, "rewards_train/margins_1": -0.1596376895904541, "rewards_train/margins_2": 2.18054735660553, "step": 245 }, { "epoch": 0.73, "logps_train/policy_1_2": -137.66519165039062, "logps_train/policy_1_l": -212.41641235351562, "logps_train/policy_1_w": -128.09071350097656, "logps_train/policy_2_2": -105.41893005371094, "logps_train/policy_2_w": -175.2616424560547, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 1.302230954170227, "rewards_train/1-l": -2.6267974376678467, "rewards_train/1-w": 2.5077247619628906, "rewards_train/2-2": 2.2112317085266113, "rewards_train/2-w": 1.1972730159759521, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.134522199630737, "rewards_train/margins_1": 1.2054938077926636, "rewards_train/margins_2": 1.0139586925506592, "step": 245 }, { "epoch": 0.73, "logps_train/policy_1_2": -173.49082946777344, "logps_train/policy_1_l": -266.099853515625, "logps_train/policy_1_w": -148.75650024414062, "logps_train/policy_2_2": -145.9529571533203, "logps_train/policy_2_w": -182.96437072753906, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -241.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.199355125427246, "rewards_train/1-l": -2.5113515853881836, "rewards_train/1-w": 2.2384135723114014, "rewards_train/2-2": 2.094938278198242, "rewards_train/2-w": 1.517235279083252, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.749765157699585, "rewards_train/margins_1": 1.0390584468841553, "rewards_train/margins_2": 0.5777029991149902, "step": 245 }, { "epoch": 0.73, "logps_train/policy_1_2": -134.48373413085938, "logps_train/policy_1_l": -143.84149169921875, "logps_train/policy_1_w": -94.38658142089844, "logps_train/policy_2_2": -102.69224548339844, "logps_train/policy_2_w": -135.2803192138672, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.2305316925048828, "rewards_train/1-l": -1.3958678245544434, "rewards_train/1-w": 1.9351701736450195, "rewards_train/2-2": 2.3409318923950195, "rewards_train/2-w": 0.8297813534736633, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.331037998199463, "rewards_train/margins_1": 0.7046384811401367, "rewards_train/margins_2": 1.5111505389213562, "step": 245 }, { "epoch": 0.73, "logps_train/policy_1_2": -165.5558624267578, "logps_train/policy_1_l": -117.45679473876953, "logps_train/policy_1_w": -133.56185913085938, "logps_train/policy_2_2": -132.38565063476562, "logps_train/policy_2_w": -168.59573364257812, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 2.0549604892730713, "rewards_train/1-l": -1.304078221321106, "rewards_train/1-w": 2.971548557281494, "rewards_train/2-2": 3.266122817993164, "rewards_train/2-w": 1.9998009204864502, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.2756267786026, "rewards_train/margins_1": 0.9165880680084229, "rewards_train/margins_2": 1.2663218975067139, "step": 245 }, { "epoch": 0.73, "logps_train/policy_1_2": -161.568359375, "logps_train/policy_1_l": -147.94891357421875, "logps_train/policy_1_w": -140.2831268310547, "logps_train/policy_2_2": -129.8487548828125, "logps_train/policy_2_w": -169.32135009765625, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.0806643962860107, "rewards_train/1-l": -1.2028995752334595, "rewards_train/1-w": 2.8058664798736572, "rewards_train/2-2": 2.0205938816070557, "rewards_train/2-w": 1.6414982080459595, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.008766055107117, "rewards_train/margins_1": 1.7252020835876465, "rewards_train/margins_2": 0.3790956735610962, "step": 245 }, { "epoch": 0.73, "logps_train/policy_1_2": -123.26750183105469, "logps_train/policy_1_l": -137.87583923339844, "logps_train/policy_1_w": -94.16763305664062, "logps_train/policy_2_2": -97.44966125488281, "logps_train/policy_2_w": -108.98310852050781, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 0.8205149173736572, "rewards_train/1-l": -1.400865077972412, "rewards_train/1-w": 1.6711276769638062, "rewards_train/2-2": 1.964604377746582, "rewards_train/2-w": 1.3005173206329346, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.0719927549362183, "rewards_train/margins_1": 0.8506127595901489, "rewards_train/margins_2": 0.6640870571136475, "step": 245 }, { "epoch": 0.73, "logps_train/policy_1_2": -252.65380859375, "logps_train/policy_1_l": -232.4350128173828, "logps_train/policy_1_w": -229.2885284423828, "logps_train/policy_2_2": -208.1583251953125, "logps_train/policy_2_w": -294.9693603515625, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -217.0, "logps_train/ref_1_w": -258.0, "logps_train/ref_2_2": -243.0, "logps_train/ref_2_w": -306.0, "rewards_train/1-2": 1.7025880813598633, "rewards_train/1-l": -1.498970627784729, "rewards_train/1-w": 2.933648109436035, "rewards_train/2-2": 3.478698253631592, "rewards_train/2-w": 1.0405645370483398, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.432618737220764, "rewards_train/margins_1": 1.2310600280761719, "rewards_train/margins_2": 2.438133716583252, "step": 245 }, { "epoch": 0.74, "learning_rate": 3.7500000000000005e-06, "loss": 0.8615, "step": 246 }, { "epoch": 0.74, "logps_train/policy_1_2": -259.24224853515625, "logps_train/policy_1_l": -162.24325561523438, "logps_train/policy_1_w": -177.50758361816406, "logps_train/policy_2_2": -216.6970672607422, "logps_train/policy_2_w": -223.59356689453125, "logps_train/ref_1_2": -262.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -235.0, "logps_train/ref_2_w": -236.0, "rewards_train/1-2": 0.23358631134033203, "rewards_train/1-l": -1.6335046291351318, "rewards_train/1-w": 3.1328349113464355, "rewards_train/2-2": 1.7920126914978027, "rewards_train/2-w": 1.2812669277191162, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.766339540481567, "rewards_train/margins_1": 2.8992486000061035, "rewards_train/margins_2": 0.5107457637786865, "step": 246 }, { "epoch": 0.74, "logps_train/policy_1_2": -178.92344665527344, "logps_train/policy_1_l": -193.83999633789062, "logps_train/policy_1_w": -154.78269958496094, "logps_train/policy_2_2": -138.60862731933594, "logps_train/policy_2_w": -201.39173889160156, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 1.3185927867889404, "rewards_train/1-l": -2.2160301208496094, "rewards_train/1-w": 2.8709490299224854, "rewards_train/2-2": 2.7282004356384277, "rewards_train/2-w": 1.3612161874771118, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.086979150772095, "rewards_train/margins_1": 1.552356243133545, "rewards_train/margins_2": 1.366984248161316, "step": 246 }, { "epoch": 0.74, "logps_train/policy_1_2": -232.3795166015625, "logps_train/policy_1_l": -246.97915649414062, "logps_train/policy_1_w": -193.39999389648438, "logps_train/policy_2_2": -187.81968688964844, "logps_train/policy_2_w": -234.2555389404297, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -233.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -262.0, "rewards_train/1-2": 0.7401725053787231, "rewards_train/1-l": -2.060417413711548, "rewards_train/1-w": 3.9428141117095947, "rewards_train/2-2": 2.3461556434631348, "rewards_train/2-w": 2.801008462905884, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.003231525421143, "rewards_train/margins_1": 3.2026416063308716, "rewards_train/margins_2": -0.454852819442749, "step": 246 }, { "epoch": 0.74, "logps_train/policy_1_2": -133.8706512451172, "logps_train/policy_1_l": -157.17660522460938, "logps_train/policy_1_w": -57.884986877441406, "logps_train/policy_2_2": -102.28140258789062, "logps_train/policy_2_w": -72.96176147460938, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -70.0, "logps_train/ref_2_2": -114.5, "logps_train/ref_2_w": -83.0, "rewards_train/1-2": 0.22543512284755707, "rewards_train/1-l": -2.112192392349243, "rewards_train/1-w": 1.2165793180465698, "rewards_train/2-2": 1.2312341928482056, "rewards_train/2-w": 1.0038236379623413, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.328771710395813, "rewards_train/margins_1": 0.9911441951990128, "rewards_train/margins_2": 0.22741055488586426, "step": 246 }, { "epoch": 0.74, "logps_train/policy_1_2": -207.4850616455078, "logps_train/policy_1_l": -204.59896850585938, "logps_train/policy_1_w": -110.8365707397461, "logps_train/policy_2_2": -154.63502502441406, "logps_train/policy_2_w": -138.42864990234375, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.8327425122261047, "rewards_train/1-l": -2.0536458492279053, "rewards_train/1-w": 2.0444676876068115, "rewards_train/2-2": 2.6599349975585938, "rewards_train/2-w": 1.36338472366333, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.098113536834717, "rewards_train/margins_1": 1.2117251753807068, "rewards_train/margins_2": 1.2965502738952637, "step": 246 }, { "epoch": 0.74, "logps_train/policy_1_2": -116.59236907958984, "logps_train/policy_1_l": -129.61111450195312, "logps_train/policy_1_w": -91.05878448486328, "logps_train/policy_2_2": -96.36988830566406, "logps_train/policy_2_w": -116.03434753417969, "logps_train/ref_1_2": -125.5, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -110.5, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 0.8876383900642395, "rewards_train/1-l": -1.6027133464813232, "rewards_train/1-w": 1.6300592422485352, "rewards_train/2-2": 1.3786365985870361, "rewards_train/2-w": 0.7075029611587524, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.2327725887298584, "rewards_train/margins_1": 0.7424208521842957, "rewards_train/margins_2": 0.6711336374282837, "step": 246 }, { "epoch": 0.74, "logps_train/policy_1_2": -247.13876342773438, "logps_train/policy_1_l": -228.509765625, "logps_train/policy_1_w": -155.75, "logps_train/policy_2_2": -206.50143432617188, "logps_train/policy_2_w": -196.0211944580078, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -242.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 2.2290921211242676, "rewards_train/1-l": -1.8130872249603271, "rewards_train/1-w": 2.801563262939453, "rewards_train/2-2": 3.534231424331665, "rewards_train/2-w": 1.1080363988876343, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.61465048789978, "rewards_train/margins_1": 0.5724711418151855, "rewards_train/margins_2": 2.4261950254440308, "step": 246 }, { "epoch": 0.74, "logps_train/policy_1_2": -171.4491424560547, "logps_train/policy_1_l": -205.65969848632812, "logps_train/policy_1_w": -162.3141326904297, "logps_train/policy_2_2": -130.52098083496094, "logps_train/policy_2_w": -209.81298828125, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": 1.2113358974456787, "rewards_train/1-l": -2.243607521057129, "rewards_train/1-w": 2.2217109203338623, "rewards_train/2-2": 2.3322768211364746, "rewards_train/2-w": -0.0594235360622406, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.465318441390991, "rewards_train/margins_1": 1.0103750228881836, "rewards_train/margins_2": 2.391700357198715, "step": 246 }, { "epoch": 0.74, "logps_train/policy_1_2": -158.29598999023438, "logps_train/policy_1_l": -127.57991027832031, "logps_train/policy_1_w": -157.2606964111328, "logps_train/policy_2_2": -130.69448852539062, "logps_train/policy_2_w": -187.29847717285156, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.252433180809021, "rewards_train/1-l": -0.9927566051483154, "rewards_train/1-w": 2.620805263519287, "rewards_train/2-2": 1.8778175115585327, "rewards_train/2-w": 1.6264023780822754, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.6135618686676025, "rewards_train/margins_1": 1.3683720827102661, "rewards_train/margins_2": 0.2514151334762573, "step": 247 }, { "epoch": 0.74, "logps_train/policy_1_2": -130.29364013671875, "logps_train/policy_1_l": -102.21290588378906, "logps_train/policy_1_w": -76.27800750732422, "logps_train/policy_2_2": -94.55599975585938, "logps_train/policy_2_w": -95.36763000488281, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -103.0, "rewards_train/1-2": 1.0167295932769775, "rewards_train/1-l": -0.6076180338859558, "rewards_train/1-w": 1.0659490823745728, "rewards_train/2-2": 2.615884780883789, "rewards_train/2-w": 0.755423903465271, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.6735671162605286, "rewards_train/margins_1": 0.049219489097595215, "rewards_train/margins_2": 1.860460877418518, "step": 247 }, { "epoch": 0.74, "logps_train/policy_1_2": -165.83816528320312, "logps_train/policy_1_l": -140.89874267578125, "logps_train/policy_1_w": -142.65292358398438, "logps_train/policy_2_2": -140.54666137695312, "logps_train/policy_2_w": -179.99847412109375, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 1.442745566368103, "rewards_train/1-l": -1.1656553745269775, "rewards_train/1-w": 3.341348648071289, "rewards_train/2-2": 2.394162654876709, "rewards_train/2-w": 2.51694917678833, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.507004022598267, "rewards_train/margins_1": 1.898603081703186, "rewards_train/margins_2": -0.1227865219116211, "step": 247 }, { "epoch": 0.74, "logps_train/policy_1_2": -196.28298950195312, "logps_train/policy_1_l": -220.44920349121094, "logps_train/policy_1_w": -187.41558837890625, "logps_train/policy_2_2": -159.5897674560547, "logps_train/policy_2_w": -228.074951171875, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -216.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -243.0, "rewards_train/1-2": 2.1166234016418457, "rewards_train/1-l": -1.1699199676513672, "rewards_train/1-w": 2.874065637588501, "rewards_train/2-2": 3.294539451599121, "rewards_train/2-w": 1.487816333770752, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.043985605239868, "rewards_train/margins_1": 0.7574422359466553, "rewards_train/margins_2": 1.8067231178283691, "step": 247 }, { "epoch": 0.74, "logps_train/policy_1_2": -154.81423950195312, "logps_train/policy_1_l": -126.48422241210938, "logps_train/policy_1_w": -115.33430480957031, "logps_train/policy_2_2": -120.07840728759766, "logps_train/policy_2_w": -149.6334228515625, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 0.9185750484466553, "rewards_train/1-l": -1.3876804113388062, "rewards_train/1-w": 1.587273120880127, "rewards_train/2-2": 1.9866900444030762, "rewards_train/2-w": 0.8140007257461548, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.974953532218933, "rewards_train/margins_1": 0.6686980724334717, "rewards_train/margins_2": 1.1726893186569214, "step": 247 }, { "epoch": 0.74, "logps_train/policy_1_2": -154.0074462890625, "logps_train/policy_1_l": -162.78665161132812, "logps_train/policy_1_w": -146.2003936767578, "logps_train/policy_2_2": -120.82914733886719, "logps_train/policy_2_w": -192.61221313476562, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 0.6840211153030396, "rewards_train/1-l": -0.9630401134490967, "rewards_train/1-w": 3.0080857276916504, "rewards_train/2-2": 1.4674761295318604, "rewards_train/2-w": 1.4262794256210327, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.971125841140747, "rewards_train/margins_1": 2.324064612388611, "rewards_train/margins_2": 0.04119670391082764, "step": 247 }, { "epoch": 0.74, "logps_train/policy_1_2": -184.86244201660156, "logps_train/policy_1_l": -177.63462829589844, "logps_train/policy_1_w": -99.93515014648438, "logps_train/policy_2_2": -143.615234375, "logps_train/policy_2_w": -136.1728515625, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.6594585180282593, "rewards_train/1-l": -1.8593615293502808, "rewards_train/1-w": 2.3783602714538574, "rewards_train/2-2": 2.9126954078674316, "rewards_train/2-w": 1.3542001247406006, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.237721800804138, "rewards_train/margins_1": 0.7189017534255981, "rewards_train/margins_2": 1.558495283126831, "step": 247 }, { "epoch": 0.74, "logps_train/policy_1_2": -145.07847595214844, "logps_train/policy_1_l": -209.11073303222656, "logps_train/policy_1_w": -107.81889343261719, "logps_train/policy_2_2": -111.06405639648438, "logps_train/policy_2_w": -138.83876037597656, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 1.253090500831604, "rewards_train/1-l": -3.07474422454834, "rewards_train/1-w": 1.736860752105713, "rewards_train/2-2": 2.267812728881836, "rewards_train/2-w": 1.0286242961883545, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.811604976654053, "rewards_train/margins_1": 0.4837702512741089, "rewards_train/margins_2": 1.2391884326934814, "step": 247 }, { "epoch": 0.74, "learning_rate": 3.728550205580564e-06, "loss": 0.8747, "step": 248 }, { "epoch": 0.74, "logps_train/policy_1_2": -205.53878784179688, "logps_train/policy_1_l": -178.39620971679688, "logps_train/policy_1_w": -143.458251953125, "logps_train/policy_2_2": -171.42140197753906, "logps_train/policy_2_w": -177.60882568359375, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 0.37268418073654175, "rewards_train/1-l": -1.5169644355773926, "rewards_train/1-w": 3.096362352371216, "rewards_train/2-2": 1.358250617980957, "rewards_train/2-w": 2.337555170059204, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.613326787948608, "rewards_train/margins_1": 2.723678171634674, "rewards_train/margins_2": -0.9793045520782471, "step": 248 }, { "epoch": 0.74, "logps_train/policy_1_2": -181.21701049804688, "logps_train/policy_1_l": -128.26876831054688, "logps_train/policy_1_w": -194.68606567382812, "logps_train/policy_2_2": -144.60169982910156, "logps_train/policy_2_w": -243.83584594726562, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -230.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -262.0, "rewards_train/1-2": 1.1596477031707764, "rewards_train/1-l": -0.7899621725082397, "rewards_train/1-w": 3.5877413749694824, "rewards_train/2-2": 2.512876510620117, "rewards_train/2-w": 1.8410245180130005, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.377703547477722, "rewards_train/margins_1": 2.428093671798706, "rewards_train/margins_2": 0.6718519926071167, "step": 248 }, { "epoch": 0.74, "logps_train/policy_1_2": -135.05661010742188, "logps_train/policy_1_l": -167.1341094970703, "logps_train/policy_1_w": -93.72624969482422, "logps_train/policy_2_2": -104.88470458984375, "logps_train/policy_2_w": -142.1809844970703, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 0.44355711340904236, "rewards_train/1-l": -2.479719638824463, "rewards_train/1-w": 1.8367494344711304, "rewards_train/2-2": 1.4763731956481934, "rewards_train/2-w": 0.6506521701812744, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.316469073295593, "rewards_train/margins_1": 1.393192321062088, "rewards_train/margins_2": 0.825721025466919, "step": 248 }, { "epoch": 0.74, "logps_train/policy_1_2": -135.29139709472656, "logps_train/policy_1_l": -174.883544921875, "logps_train/policy_1_w": -94.3629379272461, "logps_train/policy_2_2": -98.47075653076172, "logps_train/policy_2_w": -120.69725799560547, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.0989853143692017, "rewards_train/1-l": -1.6071035861968994, "rewards_train/1-w": 1.6096047163009644, "rewards_train/2-2": 1.804486632347107, "rewards_train/2-w": 1.0048837661743164, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2167083024978638, "rewards_train/margins_1": 0.5106194019317627, "rewards_train/margins_2": 0.7996028661727905, "step": 248 }, { "epoch": 0.74, "logps_train/policy_1_2": -161.58895874023438, "logps_train/policy_1_l": -121.7886962890625, "logps_train/policy_1_w": -147.14822387695312, "logps_train/policy_2_2": -122.66358947753906, "logps_train/policy_2_w": -182.92538452148438, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -102.5, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.2516523599624634, "rewards_train/1-l": -1.9609013795852661, "rewards_train/1-w": 2.1547093391418457, "rewards_train/2-2": 2.4094228744506836, "rewards_train/2-w": 0.6012118458747864, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.115610718727112, "rewards_train/margins_1": 0.9030569791793823, "rewards_train/margins_2": 1.8082110285758972, "step": 248 }, { "epoch": 0.74, "logps_train/policy_1_2": -153.25869750976562, "logps_train/policy_1_l": -180.93817138671875, "logps_train/policy_1_w": -100.64481353759766, "logps_train/policy_2_2": -122.9389877319336, "logps_train/policy_2_w": -122.1507568359375, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.8096274137496948, "rewards_train/1-l": -2.236443042755127, "rewards_train/1-w": 1.7315635681152344, "rewards_train/2-2": 1.8650853633880615, "rewards_train/2-w": 1.1849240064620972, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.9680066108703613, "rewards_train/margins_1": 0.9219361543655396, "rewards_train/margins_2": 0.6801613569259644, "step": 248 }, { "epoch": 0.74, "logps_train/policy_1_2": -129.32650756835938, "logps_train/policy_1_l": -213.03793334960938, "logps_train/policy_1_w": -162.81471252441406, "logps_train/policy_2_2": -111.41537475585938, "logps_train/policy_2_w": -192.56040954589844, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 1.0360996723175049, "rewards_train/1-l": -2.731917142868042, "rewards_train/1-w": 2.943528652191162, "rewards_train/2-2": 1.5490872859954834, "rewards_train/2-w": 2.2314586639404297, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 5.675445795059204, "rewards_train/margins_1": 1.9074289798736572, "rewards_train/margins_2": -0.6823713779449463, "step": 248 }, { "epoch": 0.74, "logps_train/policy_1_2": -218.85186767578125, "logps_train/policy_1_l": -185.7195281982422, "logps_train/policy_1_w": -162.08941650390625, "logps_train/policy_2_2": -184.43109130859375, "logps_train/policy_2_w": -198.73550415039062, "logps_train/ref_1_2": -237.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -215.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 1.8460628986358643, "rewards_train/1-l": -1.421952724456787, "rewards_train/1-w": 2.6644961833953857, "rewards_train/2-2": 3.0693912506103516, "rewards_train/2-w": 1.8967628479003906, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.086448907852173, "rewards_train/margins_1": 0.8184332847595215, "rewards_train/margins_2": 1.172628402709961, "step": 248 }, { "epoch": 0.75, "logps_train/policy_1_2": -98.81273651123047, "logps_train/policy_1_l": -57.65885925292969, "logps_train/policy_1_w": -88.20829772949219, "logps_train/policy_2_2": -75.78584289550781, "logps_train/policy_2_w": -117.86509704589844, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -50.25, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 0.9968517422676086, "rewards_train/1-l": -0.7207690477371216, "rewards_train/1-w": 1.9045600891113281, "rewards_train/2-2": 1.6612595319747925, "rewards_train/2-w": 1.2326306104660034, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.6253291368484497, "rewards_train/margins_1": 0.9077083468437195, "rewards_train/margins_2": 0.42862892150878906, "step": 249 }, { "epoch": 0.75, "logps_train/policy_1_2": -208.39706420898438, "logps_train/policy_1_l": -269.162109375, "logps_train/policy_1_w": -167.5537872314453, "logps_train/policy_2_2": -183.6974334716797, "logps_train/policy_2_w": -198.40562438964844, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -240.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -219.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 2.618105888366699, "rewards_train/1-l": -2.8749032020568848, "rewards_train/1-w": 3.157121419906616, "rewards_train/2-2": 3.532209634780884, "rewards_train/2-w": 1.9031871557235718, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.032024621963501, "rewards_train/margins_1": 0.539015531539917, "rewards_train/margins_2": 1.629022479057312, "step": 249 }, { "epoch": 0.75, "logps_train/policy_1_2": -211.8912353515625, "logps_train/policy_1_l": -187.29322814941406, "logps_train/policy_1_w": -120.77594757080078, "logps_train/policy_2_2": -167.43951416015625, "logps_train/policy_2_w": -157.29544067382812, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.8390012979507446, "rewards_train/1-l": -2.003932476043701, "rewards_train/1-w": 2.0345139503479004, "rewards_train/2-2": 3.4169869422912598, "rewards_train/2-w": 0.9509244561195374, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.038446426391602, "rewards_train/margins_1": 0.19551265239715576, "rewards_train/margins_2": 2.4660624861717224, "step": 249 }, { "epoch": 0.75, "logps_train/policy_1_2": -119.28501892089844, "logps_train/policy_1_l": -145.29579162597656, "logps_train/policy_1_w": -128.72076416015625, "logps_train/policy_2_2": -100.64527893066406, "logps_train/policy_2_w": -148.1242218017578, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -112.5, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 0.686342179775238, "rewards_train/1-l": -1.3424700498580933, "rewards_train/1-w": 1.7990177869796753, "rewards_train/2-2": 1.1850812435150146, "rewards_train/2-w": 1.1742960214614868, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.1414878368377686, "rewards_train/margins_1": 1.1126756072044373, "rewards_train/margins_2": 0.010785222053527832, "step": 249 }, { "epoch": 0.75, "logps_train/policy_1_2": -185.1118621826172, "logps_train/policy_1_l": -183.127197265625, "logps_train/policy_1_w": -138.79470825195312, "logps_train/policy_2_2": -160.71878051757812, "logps_train/policy_2_w": -166.85699462890625, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.0724071264266968, "rewards_train/1-l": -2.210376739501953, "rewards_train/1-w": 2.4486541748046875, "rewards_train/2-2": 1.9492149353027344, "rewards_train/2-w": 1.536176323890686, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.659030914306641, "rewards_train/margins_1": 1.3762470483779907, "rewards_train/margins_2": 0.41303861141204834, "step": 249 }, { "epoch": 0.75, "logps_train/policy_1_2": -115.28459167480469, "logps_train/policy_1_l": -166.78546142578125, "logps_train/policy_1_w": -75.40277099609375, "logps_train/policy_2_2": -95.97410583496094, "logps_train/policy_2_w": -89.94203186035156, "logps_train/ref_1_2": -125.5, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -91.5, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -103.0, "rewards_train/1-2": 0.9879472851753235, "rewards_train/1-l": -1.8398730754852295, "rewards_train/1-w": 1.60640287399292, "rewards_train/2-2": 1.670557975769043, "rewards_train/2-w": 1.3171249628067017, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.4462759494781494, "rewards_train/margins_1": 0.6184555888175964, "rewards_train/margins_2": 0.3534330129623413, "step": 249 }, { "epoch": 0.75, "logps_train/policy_1_2": -156.31402587890625, "logps_train/policy_1_l": -146.1398468017578, "logps_train/policy_1_w": -96.01115417480469, "logps_train/policy_2_2": -128.90695190429688, "logps_train/policy_2_w": -131.49020385742188, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 2.0623486042022705, "rewards_train/1-l": -1.948359727859497, "rewards_train/1-w": 2.0981035232543945, "rewards_train/2-2": 2.7968039512634277, "rewards_train/2-w": 1.0306674242019653, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.046463251113892, "rewards_train/margins_1": 0.03575491905212402, "rewards_train/margins_2": 1.7661365270614624, "step": 249 }, { "epoch": 0.75, "logps_train/policy_1_2": -249.36453247070312, "logps_train/policy_1_l": -271.8609313964844, "logps_train/policy_1_w": -190.5804443359375, "logps_train/policy_2_2": -194.32858276367188, "logps_train/policy_2_w": -242.5518035888672, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -245.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -234.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 1.8112014532089233, "rewards_train/1-l": -2.7423431873321533, "rewards_train/1-w": 2.9810173511505127, "rewards_train/2-2": 3.9897990226745605, "rewards_train/2-w": 1.6862258911132812, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.723360538482666, "rewards_train/margins_1": 1.1698158979415894, "rewards_train/margins_2": 2.3035731315612793, "step": 249 }, { "epoch": 0.75, "learning_rate": 3.7069805068268626e-06, "loss": 0.7303, "step": 250 }, { "epoch": 0.75, "logps_train/policy_1_2": -87.89281463623047, "logps_train/policy_1_l": -52.2573127746582, "logps_train/policy_1_w": -62.87443542480469, "logps_train/policy_2_2": -64.61058807373047, "logps_train/policy_2_w": -92.37873840332031, "logps_train/ref_1_2": -93.0, "logps_train/ref_1_l": -53.75, "logps_train/ref_1_w": -75.5, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -97.0, "rewards_train/1-2": 0.4911872148513794, "rewards_train/1-l": 0.14458142220973969, "rewards_train/1-w": 1.2637284994125366, "rewards_train/2-2": 0.9615970849990845, "rewards_train/2-w": 0.425699919462204, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.119147077202797, "rewards_train/margins_1": 0.7725412845611572, "rewards_train/margins_2": 0.5358971655368805, "step": 250 }, { "epoch": 0.75, "logps_train/policy_1_2": -111.53617095947266, "logps_train/policy_1_l": -139.828857421875, "logps_train/policy_1_w": -152.264892578125, "logps_train/policy_2_2": -90.2686538696289, "logps_train/policy_2_w": -189.02587890625, "logps_train/ref_1_2": -122.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.0866179466247559, "rewards_train/1-l": -1.7957760095596313, "rewards_train/1-w": 2.732886791229248, "rewards_train/2-2": 1.4020402431488037, "rewards_train/2-w": 1.2872555255889893, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.528662800788879, "rewards_train/margins_1": 1.6462688446044922, "rewards_train/margins_2": 0.11478471755981445, "step": 250 }, { "epoch": 0.75, "logps_train/policy_1_2": -125.21565246582031, "logps_train/policy_1_l": -127.3527603149414, "logps_train/policy_1_w": -99.71873474121094, "logps_train/policy_2_2": -97.99372863769531, "logps_train/policy_2_w": -131.42611694335938, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.8299964666366577, "rewards_train/1-l": -1.391233205795288, "rewards_train/1-w": 2.1695327758789062, "rewards_train/2-2": 2.5451583862304688, "rewards_train/2-w": 1.4050447940826416, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5607659816741943, "rewards_train/margins_1": 0.33953630924224854, "rewards_train/margins_2": 1.1401135921478271, "step": 250 }, { "epoch": 0.75, "logps_train/policy_1_2": -194.4123992919922, "logps_train/policy_1_l": -240.5574951171875, "logps_train/policy_1_w": -150.100341796875, "logps_train/policy_2_2": -155.2071533203125, "logps_train/policy_2_w": -204.88621520996094, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -224.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.2212597131729126, "rewards_train/1-l": -1.6787968873977661, "rewards_train/1-w": 2.44699764251709, "rewards_train/2-2": 2.3074092864990234, "rewards_train/2-w": 1.4645038843154907, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.125794529914856, "rewards_train/margins_1": 1.2257379293441772, "rewards_train/margins_2": 0.8429054021835327, "step": 250 }, { "epoch": 0.75, "logps_train/policy_1_2": -155.04971313476562, "logps_train/policy_1_l": -111.67194366455078, "logps_train/policy_1_w": -239.21437072753906, "logps_train/policy_2_2": -126.12981414794922, "logps_train/policy_2_w": -282.7441101074219, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -264.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -290.0, "rewards_train/1-2": 1.2122162580490112, "rewards_train/1-l": -0.29444047808647156, "rewards_train/1-w": 2.5134263038635254, "rewards_train/2-2": 1.8069404363632202, "rewards_train/2-w": 0.6927782297134399, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.807866781949997, "rewards_train/margins_1": 1.3012100458145142, "rewards_train/margins_2": 1.1141622066497803, "step": 250 }, { "epoch": 0.75, "logps_train/policy_1_2": -209.0548858642578, "logps_train/policy_1_l": -138.19390869140625, "logps_train/policy_1_w": -144.2606201171875, "logps_train/policy_2_2": -174.1654510498047, "logps_train/policy_2_w": -176.10354614257812, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.5163850784301758, "rewards_train/1-l": -0.5693912506103516, "rewards_train/1-w": 3.589561939239502, "rewards_train/2-2": 2.7920494079589844, "rewards_train/2-w": 2.629487991333008, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.1589531898498535, "rewards_train/margins_1": 2.073176860809326, "rewards_train/margins_2": 0.16256141662597656, "step": 250 }, { "epoch": 0.75, "logps_train/policy_1_2": -222.9381866455078, "logps_train/policy_1_l": -241.57772827148438, "logps_train/policy_1_w": -182.05502319335938, "logps_train/policy_2_2": -190.83718872070312, "logps_train/policy_2_w": -222.96871948242188, "logps_train/ref_1_2": -245.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -224.0, "logps_train/ref_2_w": -244.0, "rewards_train/1-2": 2.159306287765503, "rewards_train/1-l": -1.4400012493133545, "rewards_train/1-w": 3.436685800552368, "rewards_train/2-2": 3.317844867706299, "rewards_train/2-w": 2.0843772888183594, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.876687049865723, "rewards_train/margins_1": 1.2773795127868652, "rewards_train/margins_2": 1.2334675788879395, "step": 250 }, { "epoch": 0.75, "logps_train/policy_1_2": -193.8896484375, "logps_train/policy_1_l": -228.82000732421875, "logps_train/policy_1_w": -142.5055389404297, "logps_train/policy_2_2": -152.4368896484375, "logps_train/policy_2_w": -185.70703125, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 0.9797852039337158, "rewards_train/1-l": -3.3136415481567383, "rewards_train/1-w": 3.227571487426758, "rewards_train/2-2": 1.90631103515625, "rewards_train/2-w": 1.5417964458465576, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.541213035583496, "rewards_train/margins_1": 2.247786283493042, "rewards_train/margins_2": 0.3645145893096924, "step": 250 }, { "epoch": 0.75, "logps_train/policy_1_2": -143.19500732421875, "logps_train/policy_1_l": -134.46127319335938, "logps_train/policy_1_w": -82.40151977539062, "logps_train/policy_2_2": -125.19845581054688, "logps_train/policy_2_w": -95.2054672241211, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -99.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 1.268779993057251, "rewards_train/1-l": -0.5312834978103638, "rewards_train/1-w": 1.6645352840423584, "rewards_train/2-2": 1.9832789897918701, "rewards_train/2-w": 1.4255473613739014, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.195818781852722, "rewards_train/margins_1": 0.3957552909851074, "rewards_train/margins_2": 0.5577316284179688, "step": 251 }, { "epoch": 0.75, "logps_train/policy_1_2": -139.27523803710938, "logps_train/policy_1_l": -266.6151428222656, "logps_train/policy_1_w": -132.97488403320312, "logps_train/policy_2_2": -120.15056610107422, "logps_train/policy_2_w": -172.1066436767578, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -229.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.281459927558899, "rewards_train/1-l": -3.785928726196289, "rewards_train/1-w": 1.9991909265518188, "rewards_train/2-2": 1.8132635354995728, "rewards_train/2-w": 1.1901177167892456, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.785119652748108, "rewards_train/margins_1": 0.7177309989929199, "rewards_train/margins_2": 0.6231458187103271, "step": 251 }, { "epoch": 0.75, "logps_train/policy_1_2": -79.22897338867188, "logps_train/policy_1_l": -80.29483032226562, "logps_train/policy_1_w": -67.5395278930664, "logps_train/policy_2_2": -61.983970642089844, "logps_train/policy_2_w": -91.24671173095703, "logps_train/ref_1_2": -85.0, "logps_train/ref_1_l": -74.5, "logps_train/ref_1_w": -82.5, "logps_train/ref_2_2": -74.5, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 0.6189001798629761, "rewards_train/1-l": -0.5669833421707153, "rewards_train/1-w": 1.4851100444793701, "rewards_train/2-2": 1.2439855337142944, "rewards_train/2-w": 0.637829065322876, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.0520933866500854, "rewards_train/margins_1": 0.866209864616394, "rewards_train/margins_2": 0.6061564683914185, "step": 251 }, { "epoch": 0.75, "logps_train/policy_1_2": -220.6513671875, "logps_train/policy_1_l": -132.6372528076172, "logps_train/policy_1_w": -136.5806884765625, "logps_train/policy_2_2": -191.6943359375, "logps_train/policy_2_w": -155.71694946289062, "logps_train/ref_1_2": -235.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.4551769495010376, "rewards_train/1-l": -0.9702684879302979, "rewards_train/1-w": 1.675915002822876, "rewards_train/2-2": 2.552440643310547, "rewards_train/2-w": 1.3048673868179321, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.646183490753174, "rewards_train/margins_1": 0.22073805332183838, "rewards_train/margins_2": 1.2475732564926147, "step": 251 }, { "epoch": 0.75, "logps_train/policy_1_2": -114.16258239746094, "logps_train/policy_1_l": -107.0750503540039, "logps_train/policy_1_w": -75.03874969482422, "logps_train/policy_2_2": -86.47949981689453, "logps_train/policy_2_w": -95.81733703613281, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -95.5, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 1.2771015167236328, "rewards_train/1-l": -1.0160987377166748, "rewards_train/1-w": 2.04378080368042, "rewards_train/2-2": 2.2676749229431152, "rewards_train/2-w": 1.5678761005401611, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.0598795413970947, "rewards_train/margins_1": 0.7666792869567871, "rewards_train/margins_2": 0.6997988224029541, "step": 251 }, { "epoch": 0.75, "logps_train/policy_1_2": -159.474365234375, "logps_train/policy_1_l": -163.78619384765625, "logps_train/policy_1_w": -151.75416564941406, "logps_train/policy_2_2": -128.5880126953125, "logps_train/policy_2_w": -192.82284545898438, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": 1.5619378089904785, "rewards_train/1-l": -1.9200249910354614, "rewards_train/1-w": 2.5034897327423096, "rewards_train/2-2": 2.0630745887756348, "rewards_train/2-w": 1.4286532402038574, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.423514723777771, "rewards_train/margins_1": 0.941551923751831, "rewards_train/margins_2": 0.6344213485717773, "step": 251 }, { "epoch": 0.75, "logps_train/policy_1_2": -62.181419372558594, "logps_train/policy_1_l": -109.11258697509766, "logps_train/policy_1_w": -103.40374755859375, "logps_train/policy_2_2": -44.61912536621094, "logps_train/policy_2_w": -125.96161651611328, "logps_train/ref_1_2": -67.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -54.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.507639467716217, "rewards_train/1-l": -1.303446888923645, "rewards_train/1-w": 1.7932178974151611, "rewards_train/2-2": 0.9564469456672668, "rewards_train/2-w": 0.8225885629653931, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.096664786338806, "rewards_train/margins_1": 1.285578429698944, "rewards_train/margins_2": 0.13385838270187378, "step": 251 }, { "epoch": 0.75, "logps_train/policy_1_2": -157.72059631347656, "logps_train/policy_1_l": -152.81076049804688, "logps_train/policy_1_w": -155.856201171875, "logps_train/policy_2_2": -128.80419921875, "logps_train/policy_2_w": -179.32229614257812, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.2935655117034912, "rewards_train/1-l": -1.244357705116272, "rewards_train/1-w": 2.601879358291626, "rewards_train/2-2": 2.0102055072784424, "rewards_train/2-w": 1.717771053314209, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.846237063407898, "rewards_train/margins_1": 1.3083138465881348, "rewards_train/margins_2": 0.2924344539642334, "step": 251 }, { "epoch": 0.75, "learning_rate": 3.685293008903471e-06, "loss": 0.7828, "step": 252 }, { "epoch": 0.75, "logps_train/policy_1_2": -250.47499084472656, "logps_train/policy_1_l": -179.2735595703125, "logps_train/policy_1_w": -162.74472045898438, "logps_train/policy_2_2": -205.3859100341797, "logps_train/policy_2_w": -195.13784790039062, "logps_train/ref_1_2": -266.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -238.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 1.515001654624939, "rewards_train/1-l": -1.4125114679336548, "rewards_train/1-w": 2.898184061050415, "rewards_train/2-2": 3.3020339012145996, "rewards_train/2-w": 1.9737151861190796, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.31069552898407, "rewards_train/margins_1": 1.383182406425476, "rewards_train/margins_2": 1.32831871509552, "step": 252 }, { "epoch": 0.75, "logps_train/policy_1_2": -197.31130981445312, "logps_train/policy_1_l": -196.87432861328125, "logps_train/policy_1_w": -166.53793334960938, "logps_train/policy_2_2": -160.36090087890625, "logps_train/policy_2_w": -207.49557495117188, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 1.1901583671569824, "rewards_train/1-l": -2.1795222759246826, "rewards_train/1-w": 1.367692232131958, "rewards_train/2-2": 2.409797191619873, "rewards_train/2-w": 0.729934811592102, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5472145080566406, "rewards_train/margins_1": 0.17753386497497559, "rewards_train/margins_2": 1.679862380027771, "step": 252 }, { "epoch": 0.75, "logps_train/policy_1_2": -170.303955078125, "logps_train/policy_1_l": -108.19194030761719, "logps_train/policy_1_w": -81.87667846679688, "logps_train/policy_2_2": -133.4104766845703, "logps_train/policy_2_w": -97.07061767578125, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -99.5, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 0.7235099077224731, "rewards_train/1-l": -0.8687056303024292, "rewards_train/1-w": 1.614675521850586, "rewards_train/2-2": 2.226139545440674, "rewards_train/2-w": 1.2027037143707275, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.483381152153015, "rewards_train/margins_1": 0.8911656141281128, "rewards_train/margins_2": 1.0234358310699463, "step": 252 }, { "epoch": 0.75, "logps_train/policy_1_2": -148.81405639648438, "logps_train/policy_1_l": -150.77951049804688, "logps_train/policy_1_w": -142.74169921875, "logps_train/policy_2_2": -124.36465454101562, "logps_train/policy_2_w": -188.49285888671875, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 1.0865635871887207, "rewards_train/1-l": -0.8720906972885132, "rewards_train/1-w": 2.4539546966552734, "rewards_train/2-2": 1.8854104280471802, "rewards_train/2-w": 1.2491511106491089, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3260453939437866, "rewards_train/margins_1": 1.3673911094665527, "rewards_train/margins_2": 0.6362593173980713, "step": 252 }, { "epoch": 0.75, "logps_train/policy_1_2": -137.91465759277344, "logps_train/policy_1_l": -179.4856414794922, "logps_train/policy_1_w": -146.2947235107422, "logps_train/policy_2_2": -115.86683654785156, "logps_train/policy_2_w": -172.77015686035156, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.8163466453552246, "rewards_train/1-l": -2.188603401184082, "rewards_train/1-w": 2.454902172088623, "rewards_train/2-2": 2.460191011428833, "rewards_train/2-w": 1.6214218139648438, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.643505573272705, "rewards_train/margins_1": 0.6385555267333984, "rewards_train/margins_2": 0.8387691974639893, "step": 252 }, { "epoch": 0.75, "logps_train/policy_1_2": -140.83209228515625, "logps_train/policy_1_l": -181.81304931640625, "logps_train/policy_1_w": -72.79074096679688, "logps_train/policy_2_2": -109.15846252441406, "logps_train/policy_2_w": -92.8839340209961, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 0.5675724744796753, "rewards_train/1-l": -1.052203893661499, "rewards_train/1-w": 1.1373324394226074, "rewards_train/2-2": 1.443528175354004, "rewards_train/2-w": 0.5299656987190247, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.1895363330841064, "rewards_train/margins_1": 0.5697599649429321, "rewards_train/margins_2": 0.9135624766349792, "step": 252 }, { "epoch": 0.75, "logps_train/policy_1_2": -120.97235107421875, "logps_train/policy_1_l": -47.128074645996094, "logps_train/policy_1_w": -95.5667724609375, "logps_train/policy_2_2": -91.56098937988281, "logps_train/policy_2_w": -121.88536834716797, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -46.0, "logps_train/ref_1_w": -117.5, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.9390932321548462, "rewards_train/1-l": -0.11598103493452072, "rewards_train/1-w": 2.1950809955596924, "rewards_train/2-2": 1.9810103178024292, "rewards_train/2-w": 1.4817752838134766, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.311062030494213, "rewards_train/margins_1": 1.2559877634048462, "rewards_train/margins_2": 0.49923503398895264, "step": 252 }, { "epoch": 0.75, "logps_train/policy_1_2": -168.17129516601562, "logps_train/policy_1_l": -170.00567626953125, "logps_train/policy_1_w": -98.70440673828125, "logps_train/policy_2_2": -122.5935287475586, "logps_train/policy_2_w": -128.13453674316406, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 0.8078710436820984, "rewards_train/1-l": -2.0849430561065674, "rewards_train/1-w": 1.4975281953811646, "rewards_train/2-2": 1.8015849590301514, "rewards_train/2-w": 0.7302958965301514, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.582471251487732, "rewards_train/margins_1": 0.6896571516990662, "rewards_train/margins_2": 1.0712890625, "step": 252 }, { "epoch": 0.76, "logps_train/policy_1_2": -71.96492767333984, "logps_train/policy_1_l": -100.69056701660156, "logps_train/policy_1_w": -51.593196868896484, "logps_train/policy_2_2": -47.61973571777344, "logps_train/policy_2_w": -94.69339752197266, "logps_train/ref_1_2": -83.5, "logps_train/ref_1_l": -92.0, "logps_train/ref_1_w": -66.0, "logps_train/ref_2_2": -65.0, "logps_train/ref_2_w": -100.0, "rewards_train/1-2": 1.1519447565078735, "rewards_train/1-l": -0.856947660446167, "rewards_train/1-w": 1.416656732559204, "rewards_train/2-2": 1.7450575828552246, "rewards_train/2-w": 0.5462852716445923, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.273604393005371, "rewards_train/margins_1": 0.26471197605133057, "rewards_train/margins_2": 1.1987723112106323, "step": 253 }, { "epoch": 0.76, "logps_train/policy_1_2": -223.218994140625, "logps_train/policy_1_l": -152.19976806640625, "logps_train/policy_1_w": -88.68041229248047, "logps_train/policy_2_2": -194.41867065429688, "logps_train/policy_2_w": -109.65983581542969, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -220.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.242163062095642, "rewards_train/1-l": -1.4211487770080566, "rewards_train/1-w": 2.0022714138031006, "rewards_train/2-2": 2.6206324100494385, "rewards_train/2-w": 1.3355785608291626, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4234201908111572, "rewards_train/margins_1": 0.7601083517074585, "rewards_train/margins_2": 1.2850538492202759, "step": 253 }, { "epoch": 0.76, "logps_train/policy_1_2": -76.4447021484375, "logps_train/policy_1_l": -59.10725402832031, "logps_train/policy_1_w": -55.584896087646484, "logps_train/policy_2_2": -58.829063415527344, "logps_train/policy_2_w": -69.50625610351562, "logps_train/ref_1_2": -85.0, "logps_train/ref_1_l": -50.5, "logps_train/ref_1_w": -68.5, "logps_train/ref_2_2": -70.0, "logps_train/ref_2_w": -77.5, "rewards_train/1-2": 0.8625612258911133, "rewards_train/1-l": -0.8596514463424683, "rewards_train/1-w": 1.2848694324493408, "rewards_train/2-2": 1.1374058723449707, "rewards_train/2-w": 0.7962497472763062, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.144520878791809, "rewards_train/margins_1": 0.42230820655822754, "rewards_train/margins_2": 0.34115612506866455, "step": 253 }, { "epoch": 0.76, "logps_train/policy_1_2": -101.3957748413086, "logps_train/policy_1_l": -141.4744415283203, "logps_train/policy_1_w": -64.53058624267578, "logps_train/policy_2_2": -83.69164276123047, "logps_train/policy_2_w": -77.93792724609375, "logps_train/ref_1_2": -110.0, "logps_train/ref_1_l": -125.5, "logps_train/ref_1_w": -75.0, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -83.5, "rewards_train/1-2": 0.8291727900505066, "rewards_train/1-l": -1.6186844110488892, "rewards_train/1-w": 1.0844417810440063, "rewards_train/2-2": 1.1066170930862427, "rewards_train/2-w": 0.5569880604743958, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7031261920928955, "rewards_train/margins_1": 0.25526899099349976, "rewards_train/margins_2": 0.5496290326118469, "step": 253 }, { "epoch": 0.76, "logps_train/policy_1_2": -187.82102966308594, "logps_train/policy_1_l": -195.4699249267578, "logps_train/policy_1_w": -135.91226196289062, "logps_train/policy_2_2": -141.41773986816406, "logps_train/policy_2_w": -177.08251953125, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 0.91789710521698, "rewards_train/1-l": -1.753048062324524, "rewards_train/1-w": 2.6337733268737793, "rewards_train/2-2": 2.167600631713867, "rewards_train/2-w": 1.1448729038238525, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.386821389198303, "rewards_train/margins_1": 1.7158762216567993, "rewards_train/margins_2": 1.0227277278900146, "step": 253 }, { "epoch": 0.76, "logps_train/policy_1_2": -184.40057373046875, "logps_train/policy_1_l": -179.96209716796875, "logps_train/policy_1_w": -208.41893005371094, "logps_train/policy_2_2": -155.56094360351562, "logps_train/policy_2_w": -259.1070861816406, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -242.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -280.0, "rewards_train/1-2": 1.4255671501159668, "rewards_train/1-l": -0.7258963584899902, "rewards_train/1-w": 3.386232376098633, "rewards_train/2-2": 2.248594284057617, "rewards_train/2-w": 2.179915428161621, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.112128734588623, "rewards_train/margins_1": 1.960665225982666, "rewards_train/margins_2": 0.0686788558959961, "step": 253 }, { "epoch": 0.76, "logps_train/policy_1_2": -220.67774963378906, "logps_train/policy_1_l": -268.1158752441406, "logps_train/policy_1_w": -279.1183776855469, "logps_train/policy_2_2": -172.1522216796875, "logps_train/policy_2_w": -330.194091796875, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -246.0, "logps_train/ref_1_w": -308.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -340.0, "rewards_train/1-2": 1.4134753942489624, "rewards_train/1-l": -2.1959633827209473, "rewards_train/1-w": 2.910426139831543, "rewards_train/2-2": 3.272278308868408, "rewards_train/2-w": 0.9571537971496582, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.10638952255249, "rewards_train/margins_1": 1.4969507455825806, "rewards_train/margins_2": 2.31512451171875, "step": 253 }, { "epoch": 0.76, "logps_train/policy_1_2": -94.02911376953125, "logps_train/policy_1_l": -104.04365539550781, "logps_train/policy_1_w": -109.26991271972656, "logps_train/policy_2_2": -76.39813232421875, "logps_train/policy_2_w": -139.15286254882812, "logps_train/ref_1_2": -104.5, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.0330259799957275, "rewards_train/1-l": -1.5176465511322021, "rewards_train/1-w": 2.67574405670166, "rewards_train/2-2": 1.4566713571548462, "rewards_train/2-w": 1.7737767696380615, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.193390607833862, "rewards_train/margins_1": 1.6427180767059326, "rewards_train/margins_2": -0.31710541248321533, "step": 253 }, { "epoch": 0.76, "learning_rate": 3.6634898284719533e-06, "loss": 0.7699, "step": 254 }, { "epoch": 0.76, "logps_train/policy_1_2": -216.6494140625, "logps_train/policy_1_l": -212.30438232421875, "logps_train/policy_1_w": -153.90725708007812, "logps_train/policy_2_2": -172.57798767089844, "logps_train/policy_2_w": -207.979248046875, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 1.4884768724441528, "rewards_train/1-l": -2.281024932861328, "rewards_train/1-w": 2.3795862197875977, "rewards_train/2-2": 2.698645830154419, "rewards_train/2-w": 0.7458239793777466, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.660611152648926, "rewards_train/margins_1": 0.8911093473434448, "rewards_train/margins_2": 1.9528218507766724, "step": 254 }, { "epoch": 0.76, "logps_train/policy_1_2": -178.39736938476562, "logps_train/policy_1_l": -199.85107421875, "logps_train/policy_1_w": -189.73358154296875, "logps_train/policy_2_2": -147.42276000976562, "logps_train/policy_2_w": -221.86643981933594, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 1.4483494758605957, "rewards_train/1-l": -1.4720228910446167, "rewards_train/1-w": 2.6922683715820312, "rewards_train/2-2": 2.33174729347229, "rewards_train/2-w": 1.7914807796478271, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.164291262626648, "rewards_train/margins_1": 1.2439188957214355, "rewards_train/margins_2": 0.5402665138244629, "step": 254 }, { "epoch": 0.76, "logps_train/policy_1_2": -214.08062744140625, "logps_train/policy_1_l": -231.09500122070312, "logps_train/policy_1_w": -167.59814453125, "logps_train/policy_2_2": -166.13258361816406, "logps_train/policy_2_w": -225.97354125976562, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -239.0, "rewards_train/1-2": 2.15346097946167, "rewards_train/1-l": -2.0175061225891113, "rewards_train/1-w": 2.7429211139678955, "rewards_train/2-2": 3.296116352081299, "rewards_train/2-w": 1.2596765756607056, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.760427236557007, "rewards_train/margins_1": 0.5894601345062256, "rewards_train/margins_2": 2.0364397764205933, "step": 254 }, { "epoch": 0.76, "logps_train/policy_1_2": -118.57648468017578, "logps_train/policy_1_l": -120.35174560546875, "logps_train/policy_1_w": -83.18084716796875, "logps_train/policy_2_2": -103.90802764892578, "logps_train/policy_2_w": -102.66606903076172, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -94.5, "logps_train/ref_2_2": -121.5, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 1.4595386981964111, "rewards_train/1-l": -0.6040214896202087, "rewards_train/1-w": 1.1381654739379883, "rewards_train/2-2": 1.7638850212097168, "rewards_train/2-w": 0.6372994184494019, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.742186963558197, "rewards_train/margins_1": -0.32137322425842285, "rewards_train/margins_2": 1.126585602760315, "step": 254 }, { "epoch": 0.76, "logps_train/policy_1_2": -88.50953674316406, "logps_train/policy_1_l": -109.09051513671875, "logps_train/policy_1_w": -72.01969909667969, "logps_train/policy_2_2": -72.09346771240234, "logps_train/policy_2_w": -87.98887634277344, "logps_train/ref_1_2": -98.5, "logps_train/ref_1_l": -95.5, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -101.0, "rewards_train/1-2": 0.9990461468696594, "rewards_train/1-l": -1.3485043048858643, "rewards_train/1-w": 1.6359204053878784, "rewards_train/2-2": 1.4078407287597656, "rewards_train/2-w": 1.2749402523040771, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.9844247102737427, "rewards_train/margins_1": 0.636874258518219, "rewards_train/margins_2": 0.13290047645568848, "step": 254 }, { "epoch": 0.76, "logps_train/policy_1_2": -121.69963073730469, "logps_train/policy_1_l": -143.97738647460938, "logps_train/policy_1_w": -168.562255859375, "logps_train/policy_2_2": -102.7850570678711, "logps_train/policy_2_w": -199.53558349609375, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": 1.8003487586975098, "rewards_train/1-l": -1.964869499206543, "rewards_train/1-w": 2.4398677349090576, "rewards_train/2-2": 2.062119960784912, "rewards_train/2-w": 1.3542536497116089, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.404737234115601, "rewards_train/margins_1": 0.6395189762115479, "rewards_train/margins_2": 0.7078663110733032, "step": 254 }, { "epoch": 0.76, "logps_train/policy_1_2": -163.8765106201172, "logps_train/policy_1_l": -165.13394165039062, "logps_train/policy_1_w": -135.40736389160156, "logps_train/policy_2_2": -139.55145263671875, "logps_train/policy_2_w": -150.58941650390625, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.018599033355713, "rewards_train/1-l": -1.3188629150390625, "rewards_train/1-w": 1.3373886346817017, "rewards_train/2-2": 1.4901666641235352, "rewards_train/2-w": 0.8785592317581177, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.656251549720764, "rewards_train/margins_1": 0.31878960132598877, "rewards_train/margins_2": 0.6116074323654175, "step": 254 }, { "epoch": 0.76, "logps_train/policy_1_2": -117.6447525024414, "logps_train/policy_1_l": -136.50474548339844, "logps_train/policy_1_w": -122.23179626464844, "logps_train/policy_2_2": -93.95890808105469, "logps_train/policy_2_w": -154.59811401367188, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.154274821281433, "rewards_train/1-l": -0.9957873821258545, "rewards_train/1-w": 2.1795549392700195, "rewards_train/2-2": 1.6994221210479736, "rewards_train/2-w": 1.377689003944397, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.175342321395874, "rewards_train/margins_1": 1.0252801179885864, "rewards_train/margins_2": 0.32173311710357666, "step": 254 }, { "epoch": 0.76, "logps_train/policy_1_2": -219.69732666015625, "logps_train/policy_1_l": -251.56309509277344, "logps_train/policy_1_w": -162.89202880859375, "logps_train/policy_2_2": -181.5006561279297, "logps_train/policy_2_w": -208.43905639648438, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -229.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.8521428108215332, "rewards_train/1-l": -2.241955041885376, "rewards_train/1-w": 3.0451717376708984, "rewards_train/2-2": 3.4749345779418945, "rewards_train/2-w": 1.2076573371887207, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.287126779556274, "rewards_train/margins_1": 1.1930289268493652, "rewards_train/margins_2": 2.267277240753174, "step": 255 }, { "epoch": 0.76, "logps_train/policy_1_2": -118.24604034423828, "logps_train/policy_1_l": -144.62918090820312, "logps_train/policy_1_w": -143.27403259277344, "logps_train/policy_2_2": -94.22077941894531, "logps_train/policy_2_w": -181.03565979003906, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -110.5, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.319145917892456, "rewards_train/1-l": -1.033621907234192, "rewards_train/1-w": 2.403846502304077, "rewards_train/2-2": 1.6251881122589111, "rewards_train/2-w": 1.5183091163635254, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.437468409538269, "rewards_train/margins_1": 1.084700584411621, "rewards_train/margins_2": 0.10687899589538574, "step": 255 }, { "epoch": 0.76, "logps_train/policy_1_2": -137.6580047607422, "logps_train/policy_1_l": -151.83277893066406, "logps_train/policy_1_w": -111.6626968383789, "logps_train/policy_2_2": -97.51893615722656, "logps_train/policy_2_w": -155.50979614257812, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.270137071609497, "rewards_train/1-l": -1.60515296459198, "rewards_train/1-w": 2.359121322631836, "rewards_train/2-2": 2.2992782592773438, "rewards_train/2-w": 1.4029262065887451, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.964274287223816, "rewards_train/margins_1": 1.0889842510223389, "rewards_train/margins_2": 0.8963520526885986, "step": 255 }, { "epoch": 0.76, "logps_train/policy_1_2": -130.70692443847656, "logps_train/policy_1_l": -97.13052368164062, "logps_train/policy_1_w": -62.48138427734375, "logps_train/policy_2_2": -101.6658935546875, "logps_train/policy_2_w": -83.39138793945312, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -75.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -88.5, "rewards_train/1-2": 1.2535268068313599, "rewards_train/1-l": -1.4184240102767944, "rewards_train/1-w": 1.2071354389190674, "rewards_train/2-2": 2.108410358428955, "rewards_train/2-w": 0.4889855682849884, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.625559449195862, "rewards_train/margins_1": -0.04639136791229248, "rewards_train/margins_2": 1.6194247901439667, "step": 255 }, { "epoch": 0.76, "logps_train/policy_1_2": -192.22674560546875, "logps_train/policy_1_l": -75.0745849609375, "logps_train/policy_1_w": -64.51250457763672, "logps_train/policy_2_2": -145.43179321289062, "logps_train/policy_2_w": -93.63180541992188, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -67.0, "logps_train/ref_1_w": -83.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -107.5, "rewards_train/1-2": 0.2570132315158844, "rewards_train/1-l": -0.7984740734100342, "rewards_train/1-w": 1.8526561260223389, "rewards_train/2-2": 1.2888522148132324, "rewards_train/2-w": 1.3891633749008179, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.651130199432373, "rewards_train/margins_1": 1.5956428945064545, "rewards_train/margins_2": -0.10031116008758545, "step": 255 }, { "epoch": 0.76, "logps_train/policy_1_2": -140.24163818359375, "logps_train/policy_1_l": -149.9635467529297, "logps_train/policy_1_w": -97.08488464355469, "logps_train/policy_2_2": -104.73282623291016, "logps_train/policy_2_w": -133.55203247070312, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.1805245876312256, "rewards_train/1-l": -1.4967446327209473, "rewards_train/1-w": 1.8762767314910889, "rewards_train/2-2": 2.1071860790252686, "rewards_train/2-w": 0.6789772510528564, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.373021364212036, "rewards_train/margins_1": 0.6957521438598633, "rewards_train/margins_2": 1.428208827972412, "step": 255 }, { "epoch": 0.76, "logps_train/policy_1_2": -213.66407775878906, "logps_train/policy_1_l": -239.78016662597656, "logps_train/policy_1_w": -191.81747436523438, "logps_train/policy_2_2": -174.19094848632812, "logps_train/policy_2_w": -230.85162353515625, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -201.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 1.4554669857025146, "rewards_train/1-l": -2.689931869506836, "rewards_train/1-w": 2.7979390621185303, "rewards_train/2-2": 2.6652798652648926, "rewards_train/2-w": 1.6398375034332275, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.487870931625366, "rewards_train/margins_1": 1.3424720764160156, "rewards_train/margins_2": 1.025442361831665, "step": 255 }, { "epoch": 0.76, "logps_train/policy_1_2": -148.31484985351562, "logps_train/policy_1_l": -181.02264404296875, "logps_train/policy_1_w": -152.44140625, "logps_train/policy_2_2": -127.25475311279297, "logps_train/policy_2_w": -193.6020965576172, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.3278894424438477, "rewards_train/1-l": -1.3423032760620117, "rewards_train/1-w": 3.339451789855957, "rewards_train/2-2": 1.7796028852462769, "rewards_train/2-w": 2.0749473571777344, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.681755065917969, "rewards_train/margins_1": 2.0115623474121094, "rewards_train/margins_2": -0.2953444719314575, "step": 255 }, { "epoch": 0.77, "learning_rate": 3.641573093484283e-06, "loss": 0.7228, "step": 256 }, { "epoch": 0.77, "logps_train/policy_1_2": -111.71453857421875, "logps_train/policy_1_l": -118.05293273925781, "logps_train/policy_1_w": -138.82789611816406, "logps_train/policy_2_2": -96.00833129882812, "logps_train/policy_2_w": -162.80087280273438, "logps_train/ref_1_2": -127.5, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.587920904159546, "rewards_train/1-l": -0.9615435600280762, "rewards_train/1-w": 2.3609604835510254, "rewards_train/2-2": 1.9046361446380615, "rewards_train/2-w": 1.3714747428894043, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3225040435791016, "rewards_train/margins_1": 0.7730395793914795, "rewards_train/margins_2": 0.5331614017486572, "step": 256 }, { "epoch": 0.77, "logps_train/policy_1_2": -152.80596923828125, "logps_train/policy_1_l": -121.3537826538086, "logps_train/policy_1_w": -80.37533569335938, "logps_train/policy_2_2": -119.94975280761719, "logps_train/policy_2_w": -99.9825439453125, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -114.5, "rewards_train/1-2": 1.8272156715393066, "rewards_train/1-l": -1.466822862625122, "rewards_train/1-w": 2.12262225151062, "rewards_train/2-2": 2.7472119331359863, "rewards_train/2-w": 1.456432819366455, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.589445114135742, "rewards_train/margins_1": 0.2954065799713135, "rewards_train/margins_2": 1.2907791137695312, "step": 256 }, { "epoch": 0.77, "logps_train/policy_1_2": -161.18310546875, "logps_train/policy_1_l": -203.162841796875, "logps_train/policy_1_w": -146.90325927734375, "logps_train/policy_2_2": -128.75401306152344, "logps_train/policy_2_w": -186.136962890625, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.3520023822784424, "rewards_train/1-l": -2.3030028343200684, "rewards_train/1-w": 2.8721725940704346, "rewards_train/2-2": 2.2449116706848145, "rewards_train/2-w": 1.8050543069839478, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.175175428390503, "rewards_train/margins_1": 1.5201702117919922, "rewards_train/margins_2": 0.4398573637008667, "step": 256 }, { "epoch": 0.77, "logps_train/policy_1_2": -162.12042236328125, "logps_train/policy_1_l": -179.93577575683594, "logps_train/policy_1_w": -124.80006408691406, "logps_train/policy_2_2": -137.92825317382812, "logps_train/policy_2_w": -147.32427978515625, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.746747374534607, "rewards_train/1-l": -1.804905652999878, "rewards_train/1-w": 2.3227274417877197, "rewards_train/2-2": 2.5806117057800293, "rewards_train/2-w": 1.5949161052703857, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.127633094787598, "rewards_train/margins_1": 0.5759800672531128, "rewards_train/margins_2": 0.9856956005096436, "step": 256 }, { "epoch": 0.77, "logps_train/policy_1_2": -110.6202163696289, "logps_train/policy_1_l": -88.31041717529297, "logps_train/policy_1_w": -81.83634948730469, "logps_train/policy_2_2": -86.16044616699219, "logps_train/policy_2_w": -105.83094787597656, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -76.5, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 0.8032124042510986, "rewards_train/1-l": -1.1808464527130127, "rewards_train/1-w": 1.8202717304229736, "rewards_train/2-2": 1.6374707221984863, "rewards_train/2-w": 1.393467664718628, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.0011181831359863, "rewards_train/margins_1": 1.017059326171875, "rewards_train/margins_2": 0.2440030574798584, "step": 256 }, { "epoch": 0.77, "logps_train/policy_1_2": -252.55697631835938, "logps_train/policy_1_l": -201.22694396972656, "logps_train/policy_1_w": -192.20266723632812, "logps_train/policy_2_2": -211.8603973388672, "logps_train/policy_2_w": -231.39410400390625, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -230.0, "logps_train/ref_2_2": -247.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 1.7333643436431885, "rewards_train/1-l": -2.3801145553588867, "rewards_train/1-w": 3.815671920776367, "rewards_train/2-2": 3.477241039276123, "rewards_train/2-w": 2.5176196098327637, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.195786476135254, "rewards_train/margins_1": 2.0823075771331787, "rewards_train/margins_2": 0.9596214294433594, "step": 256 }, { "epoch": 0.77, "logps_train/policy_1_2": -167.9508514404297, "logps_train/policy_1_l": -191.94798278808594, "logps_train/policy_1_w": -184.31675720214844, "logps_train/policy_2_2": -133.80160522460938, "logps_train/policy_2_w": -230.040283203125, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -250.0, "rewards_train/1-2": 1.483821153640747, "rewards_train/1-l": -1.2184312343597412, "rewards_train/1-w": 2.7917613983154297, "rewards_train/2-2": 2.162026882171631, "rewards_train/2-w": 1.9983152151107788, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.010192632675171, "rewards_train/margins_1": 1.3079402446746826, "rewards_train/margins_2": 0.16371166706085205, "step": 256 }, { "epoch": 0.77, "logps_train/policy_1_2": -124.74000549316406, "logps_train/policy_1_l": -148.730224609375, "logps_train/policy_1_w": -115.50764465332031, "logps_train/policy_2_2": -108.35002136230469, "logps_train/policy_2_w": -139.05682373046875, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 0.7978753447532654, "rewards_train/1-l": -1.1355239152908325, "rewards_train/1-w": 1.3445477485656738, "rewards_train/2-2": 1.3579661846160889, "rewards_train/2-w": 0.9568171501159668, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.4800716638565063, "rewards_train/margins_1": 0.5466724038124084, "rewards_train/margins_2": 0.40114903450012207, "step": 256 }, { "epoch": 0.77, "logps_train/policy_1_2": -183.64974975585938, "logps_train/policy_1_l": -248.70217895507812, "logps_train/policy_1_w": -172.3693389892578, "logps_train/policy_2_2": -145.86114501953125, "logps_train/policy_2_w": -216.12094116210938, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -232.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.6156896352767944, "rewards_train/1-l": -1.729591727256775, "rewards_train/1-w": 1.9068166017532349, "rewards_train/2-2": 2.5595879554748535, "rewards_train/2-w": 0.5629059076309204, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6364083290100098, "rewards_train/margins_1": 0.29112696647644043, "rewards_train/margins_2": 1.996682047843933, "step": 257 }, { "epoch": 0.77, "logps_train/policy_1_2": -114.85813903808594, "logps_train/policy_1_l": -94.32759857177734, "logps_train/policy_1_w": -88.71450805664062, "logps_train/policy_2_2": -101.27515411376953, "logps_train/policy_2_w": -110.53731536865234, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -108.5, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.5173113346099854, "rewards_train/1-l": -0.3118610978126526, "rewards_train/1-w": 1.9703460931777954, "rewards_train/2-2": 1.7834222316741943, "rewards_train/2-w": 1.272049903869629, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.282207190990448, "rewards_train/margins_1": 0.45303475856781006, "rewards_train/margins_2": 0.5113723278045654, "step": 257 }, { "epoch": 0.77, "logps_train/policy_1_2": -162.76119995117188, "logps_train/policy_1_l": -187.25729370117188, "logps_train/policy_1_w": -132.49009704589844, "logps_train/policy_2_2": -132.79867553710938, "logps_train/policy_2_w": -166.7071990966797, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 0.5371603965759277, "rewards_train/1-l": -1.6604949235916138, "rewards_train/1-w": 2.2838032245635986, "rewards_train/2-2": 1.8084146976470947, "rewards_train/2-w": 1.233967661857605, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.9442981481552124, "rewards_train/margins_1": 1.746642827987671, "rewards_train/margins_2": 0.5744470357894897, "step": 257 }, { "epoch": 0.77, "logps_train/policy_1_2": -160.50363159179688, "logps_train/policy_1_l": -151.08566284179688, "logps_train/policy_1_w": -135.73440551757812, "logps_train/policy_2_2": -133.52639770507812, "logps_train/policy_2_w": -151.06353759765625, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": 0.762136697769165, "rewards_train/1-l": -1.5152069330215454, "rewards_train/1-w": 2.1640591621398926, "rewards_train/2-2": 1.637204885482788, "rewards_train/2-w": 1.5905225276947021, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.679266095161438, "rewards_train/margins_1": 1.4019224643707275, "rewards_train/margins_2": 0.04668235778808594, "step": 257 }, { "epoch": 0.77, "logps_train/policy_1_2": -132.79791259765625, "logps_train/policy_1_l": -177.56243896484375, "logps_train/policy_1_w": -78.17488861083984, "logps_train/policy_2_2": -114.68233489990234, "logps_train/policy_2_w": -92.89192199707031, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -108.5, "rewards_train/1-2": 1.5127860307693481, "rewards_train/1-l": -1.4606387615203857, "rewards_train/1-w": 1.780167579650879, "rewards_train/2-2": 2.13996958732605, "rewards_train/2-w": 1.5639331340789795, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2408063411712646, "rewards_train/margins_1": 0.26738154888153076, "rewards_train/margins_2": 0.5760364532470703, "step": 257 }, { "epoch": 0.77, "logps_train/policy_1_2": -133.6653594970703, "logps_train/policy_1_l": -156.2486572265625, "logps_train/policy_1_w": -114.02208709716797, "logps_train/policy_2_2": -102.88732147216797, "logps_train/policy_2_w": -153.1082763671875, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.3279945850372314, "rewards_train/1-l": -1.734631061553955, "rewards_train/1-w": 2.51029109954834, "rewards_train/2-2": 2.26322078704834, "rewards_train/2-w": 1.254797101020813, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.244922161102295, "rewards_train/margins_1": 1.1822965145111084, "rewards_train/margins_2": 1.0084236860275269, "step": 257 }, { "epoch": 0.77, "logps_train/policy_1_2": -206.7272491455078, "logps_train/policy_1_l": -187.95849609375, "logps_train/policy_1_w": -143.3509521484375, "logps_train/policy_2_2": -165.49795532226562, "logps_train/policy_2_w": -188.9705047607422, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.6405572891235352, "rewards_train/1-l": -1.5505378246307373, "rewards_train/1-w": 2.533656120300293, "rewards_train/2-2": 2.908601760864258, "rewards_train/2-w": 1.3966997861862183, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.08419394493103, "rewards_train/margins_1": 0.8930988311767578, "rewards_train/margins_2": 1.5119019746780396, "step": 257 }, { "epoch": 0.77, "logps_train/policy_1_2": -120.8407974243164, "logps_train/policy_1_l": -181.8397216796875, "logps_train/policy_1_w": -152.70510864257812, "logps_train/policy_2_2": -87.07206726074219, "logps_train/policy_2_w": -199.5988311767578, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -104.5, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 0.6463889479637146, "rewards_train/1-l": -2.38226318359375, "rewards_train/1-w": 2.3201138973236084, "rewards_train/2-2": 1.7461134195327759, "rewards_train/2-w": 1.1037890911102295, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.702377080917358, "rewards_train/margins_1": 1.6737249493598938, "rewards_train/margins_2": 0.6423243284225464, "step": 257 }, { "epoch": 0.77, "learning_rate": 3.6195449429751585e-06, "loss": 0.7309, "step": 258 }, { "epoch": 0.77, "logps_train/policy_1_2": -215.56829833984375, "logps_train/policy_1_l": -184.2078094482422, "logps_train/policy_1_w": -154.07266235351562, "logps_train/policy_2_2": -166.75213623046875, "logps_train/policy_2_w": -196.58975219726562, "logps_train/ref_1_2": -231.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.4931706190109253, "rewards_train/1-l": -1.314530372619629, "rewards_train/1-w": 2.7614848613739014, "rewards_train/2-2": 2.787285804748535, "rewards_train/2-w": 1.6097739934921265, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.07601523399353, "rewards_train/margins_1": 1.268314242362976, "rewards_train/margins_2": 1.1775118112564087, "step": 258 }, { "epoch": 0.77, "logps_train/policy_1_2": -175.22665405273438, "logps_train/policy_1_l": -137.04461669921875, "logps_train/policy_1_w": -113.99673461914062, "logps_train/policy_2_2": -131.51266479492188, "logps_train/policy_2_w": -151.29629516601562, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -119.5, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.6945226192474365, "rewards_train/1-l": -1.7560244798660278, "rewards_train/1-w": 2.4245452880859375, "rewards_train/2-2": 3.0604519844055176, "rewards_train/2-w": 1.0992759466171265, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.180569767951965, "rewards_train/margins_1": 0.730022668838501, "rewards_train/margins_2": 1.9611760377883911, "step": 258 }, { "epoch": 0.77, "logps_train/policy_1_2": -120.00138854980469, "logps_train/policy_1_l": -211.60406494140625, "logps_train/policy_1_w": -140.4609832763672, "logps_train/policy_2_2": -101.58857727050781, "logps_train/policy_2_w": -194.80307006835938, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.1654868125915527, "rewards_train/1-l": -1.9361871480941772, "rewards_train/1-w": 3.2898385524749756, "rewards_train/2-2": 1.6286423206329346, "rewards_train/2-w": 1.4243805408477783, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.226025700569153, "rewards_train/margins_1": 2.124351739883423, "rewards_train/margins_2": 0.20426177978515625, "step": 258 }, { "epoch": 0.77, "logps_train/policy_1_2": -192.72779846191406, "logps_train/policy_1_l": -102.24114990234375, "logps_train/policy_1_w": -137.09054565429688, "logps_train/policy_2_2": -142.40087890625, "logps_train/policy_2_w": -166.87832641601562, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 0.8615950345993042, "rewards_train/1-l": -1.363178014755249, "rewards_train/1-w": 2.996412754058838, "rewards_train/2-2": 2.67124080657959, "rewards_train/2-w": 1.821541666984558, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.359590768814087, "rewards_train/margins_1": 2.1348177194595337, "rewards_train/margins_2": 0.8496991395950317, "step": 258 }, { "epoch": 0.77, "logps_train/policy_1_2": -202.41140747070312, "logps_train/policy_1_l": -145.67861938476562, "logps_train/policy_1_w": -160.24966430664062, "logps_train/policy_2_2": -159.1694793701172, "logps_train/policy_2_w": -209.76739501953125, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.17995285987854, "rewards_train/1-l": -1.3181548118591309, "rewards_train/1-w": 2.801595687866211, "rewards_train/2-2": 2.704146146774292, "rewards_train/2-w": 0.9373220205307007, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.119750499725342, "rewards_train/margins_1": 1.621642827987671, "rewards_train/margins_2": 1.7668241262435913, "step": 258 }, { "epoch": 0.77, "logps_train/policy_1_2": -153.3499298095703, "logps_train/policy_1_l": -86.86103820800781, "logps_train/policy_1_w": -63.51189422607422, "logps_train/policy_2_2": -120.75697326660156, "logps_train/policy_2_w": -94.28947448730469, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -85.5, "logps_train/ref_1_w": -73.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": 1.0571942329406738, "rewards_train/1-l": -0.1536819189786911, "rewards_train/1-w": 0.933576226234436, "rewards_train/2-2": 2.3012561798095703, "rewards_train/2-w": 0.04995875060558319, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.0872581452131271, "rewards_train/margins_1": -0.12361800670623779, "rewards_train/margins_2": 2.251297429203987, "step": 258 }, { "epoch": 0.77, "logps_train/policy_1_2": -92.89065551757812, "logps_train/policy_1_l": -120.61286926269531, "logps_train/policy_1_w": -92.25885009765625, "logps_train/policy_2_2": -72.08060455322266, "logps_train/policy_2_w": -121.57759094238281, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -111.5, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.7949193716049194, "rewards_train/1-l": -0.48277151584625244, "rewards_train/1-w": 1.9397403001785278, "rewards_train/2-2": 2.3009238243103027, "rewards_train/2-w": 0.982865571975708, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.4225118160247803, "rewards_train/margins_1": 0.1448209285736084, "rewards_train/margins_2": 1.3180582523345947, "step": 258 }, { "epoch": 0.77, "logps_train/policy_1_2": -85.87631225585938, "logps_train/policy_1_l": -124.4637451171875, "logps_train/policy_1_w": -92.19303131103516, "logps_train/policy_2_2": -72.57859802246094, "logps_train/policy_2_w": -110.34220886230469, "logps_train/ref_1_2": -97.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 1.0811192989349365, "rewards_train/1-l": -1.0393427610397339, "rewards_train/1-w": 1.5455408096313477, "rewards_train/2-2": 1.2788584232330322, "rewards_train/2-w": 1.1450759172439575, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.5848835706710815, "rewards_train/margins_1": 0.46442151069641113, "rewards_train/margins_2": 0.1337825059890747, "step": 258 }, { "epoch": 0.78, "logps_train/policy_1_2": -103.0540771484375, "logps_train/policy_1_l": -106.48835754394531, "logps_train/policy_1_w": -105.63298797607422, "logps_train/policy_2_2": -90.89341735839844, "logps_train/policy_2_w": -127.02618408203125, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -96.5, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -105.5, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.1949832439422607, "rewards_train/1-l": -0.9990308880805969, "rewards_train/1-w": 2.1913888454437256, "rewards_train/2-2": 1.4547988176345825, "rewards_train/2-w": 1.3598816394805908, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1904197335243225, "rewards_train/margins_1": 0.9964056015014648, "rewards_train/margins_2": 0.0949171781539917, "step": 259 }, { "epoch": 0.78, "logps_train/policy_1_2": -200.86166381835938, "logps_train/policy_1_l": -158.15322875976562, "logps_train/policy_1_w": -126.56746673583984, "logps_train/policy_2_2": -165.4481964111328, "logps_train/policy_2_w": -145.34207153320312, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.9325828552246094, "rewards_train/1-l": -1.0442285537719727, "rewards_train/1-w": 2.101456880569458, "rewards_train/2-2": 3.2989308834075928, "rewards_train/2-w": 1.524385929107666, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.1456854343414307, "rewards_train/margins_1": 0.16887402534484863, "rewards_train/margins_2": 1.7745449542999268, "step": 259 }, { "epoch": 0.78, "logps_train/policy_1_2": -222.97801208496094, "logps_train/policy_1_l": -240.05897521972656, "logps_train/policy_1_w": -178.67401123046875, "logps_train/policy_2_2": -188.70135498046875, "logps_train/policy_2_w": -227.70230102539062, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -241.0, "rewards_train/1-2": 1.7732913494110107, "rewards_train/1-l": -2.788709878921509, "rewards_train/1-w": 2.701347827911377, "rewards_train/2-2": 2.894707202911377, "rewards_train/2-w": 1.3235198259353638, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.490057706832886, "rewards_train/margins_1": 0.9280564785003662, "rewards_train/margins_2": 1.5711873769760132, "step": 259 }, { "epoch": 0.78, "logps_train/policy_1_2": -241.99716186523438, "logps_train/policy_1_l": -205.58230590820312, "logps_train/policy_1_w": -157.08026123046875, "logps_train/policy_2_2": -200.35401916503906, "logps_train/policy_2_w": -187.43399047851562, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": 1.5440325736999512, "rewards_train/1-l": -2.108229875564575, "rewards_train/1-w": 3.8857223987579346, "rewards_train/2-2": 3.1739718914031982, "rewards_train/2-w": 3.1378512382507324, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.99395227432251, "rewards_train/margins_1": 2.3416898250579834, "rewards_train/margins_2": 0.03612065315246582, "step": 259 }, { "epoch": 0.78, "logps_train/policy_1_2": -243.35983276367188, "logps_train/policy_1_l": -219.22976684570312, "logps_train/policy_1_w": -159.20767211914062, "logps_train/policy_2_2": -198.69329833984375, "logps_train/policy_2_w": -199.5494384765625, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -239.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 2.6890172958374023, "rewards_train/1-l": -1.7073516845703125, "rewards_train/1-w": 2.826107978820801, "rewards_train/2-2": 4.055670261383057, "rewards_train/2-w": 1.801306962966919, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.533459663391113, "rewards_train/margins_1": 0.13709068298339844, "rewards_train/margins_2": 2.2543632984161377, "step": 259 }, { "epoch": 0.78, "logps_train/policy_1_2": -134.3718719482422, "logps_train/policy_1_l": -89.2840347290039, "logps_train/policy_1_w": -88.9012222290039, "logps_train/policy_2_2": -112.80353546142578, "logps_train/policy_2_w": -111.5139389038086, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -80.0, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 0.46046972274780273, "rewards_train/1-l": -0.9132671356201172, "rewards_train/1-w": 2.17315936088562, "rewards_train/2-2": 1.2883968353271484, "rewards_train/2-w": 1.5259499549865723, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.0864264965057373, "rewards_train/margins_1": 1.7126896381378174, "rewards_train/margins_2": -0.23755311965942383, "step": 259 }, { "epoch": 0.78, "logps_train/policy_1_2": -140.2447509765625, "logps_train/policy_1_l": -133.19314575195312, "logps_train/policy_1_w": -136.6991424560547, "logps_train/policy_2_2": -116.9353256225586, "logps_train/policy_2_w": -166.95668029785156, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -119.5, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 1.200523853302002, "rewards_train/1-l": -1.3740017414093018, "rewards_train/1-w": 2.101180076599121, "rewards_train/2-2": 2.1252176761627197, "rewards_train/2-w": 0.8394871950149536, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.475181818008423, "rewards_train/margins_1": 0.9006562232971191, "rewards_train/margins_2": 1.2857304811477661, "step": 259 }, { "epoch": 0.78, "logps_train/policy_1_2": -222.67059326171875, "logps_train/policy_1_l": -171.23828125, "logps_train/policy_1_w": -131.15975952148438, "logps_train/policy_2_2": -164.012939453125, "logps_train/policy_2_w": -176.9727783203125, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 0.9079406261444092, "rewards_train/1-l": -1.186328649520874, "rewards_train/1-w": 2.8543365001678467, "rewards_train/2-2": 2.8518316745758057, "rewards_train/2-w": 1.5480355024337769, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.040665149688721, "rewards_train/margins_1": 1.9463958740234375, "rewards_train/margins_2": 1.3037961721420288, "step": 259 }, { "epoch": 0.78, "learning_rate": 3.5974075268532354e-06, "loss": 0.7252, "step": 260 }, { "epoch": 0.78, "logps_train/policy_1_2": -242.51759338378906, "logps_train/policy_1_l": -231.5281982421875, "logps_train/policy_1_w": -143.96694946289062, "logps_train/policy_2_2": -198.34463500976562, "logps_train/policy_2_w": -183.16958618164062, "logps_train/ref_1_2": -266.0, "logps_train/ref_1_l": -216.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -236.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 2.3107407093048096, "rewards_train/1-l": -1.6528197526931763, "rewards_train/1-w": 2.3626794815063477, "rewards_train/2-2": 3.778036594390869, "rewards_train/2-w": 1.4892926216125488, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.015499234199524, "rewards_train/margins_1": 0.051938772201538086, "rewards_train/margins_2": 2.2887439727783203, "step": 260 }, { "epoch": 0.78, "logps_train/policy_1_2": -142.55029296875, "logps_train/policy_1_l": -194.46493530273438, "logps_train/policy_1_w": -113.39456176757812, "logps_train/policy_2_2": -115.42072296142578, "logps_train/policy_2_w": -149.70755004882812, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.8934074640274048, "rewards_train/1-l": -1.500887393951416, "rewards_train/1-w": 2.597165584564209, "rewards_train/2-2": 2.5273611545562744, "rewards_train/2-w": 1.771040678024292, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.098052978515625, "rewards_train/margins_1": 0.7037581205368042, "rewards_train/margins_2": 0.7563204765319824, "step": 260 }, { "epoch": 0.78, "logps_train/policy_1_2": -191.01943969726562, "logps_train/policy_1_l": -243.4564666748047, "logps_train/policy_1_w": -153.587646484375, "logps_train/policy_2_2": -151.77037048339844, "logps_train/policy_2_w": -186.71006774902344, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.8527437448501587, "rewards_train/1-l": -2.203458786010742, "rewards_train/1-w": 3.1892833709716797, "rewards_train/2-2": 3.1237447261810303, "rewards_train/2-w": 2.1286027431488037, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.392742156982422, "rewards_train/margins_1": 1.336539626121521, "rewards_train/margins_2": 0.9951419830322266, "step": 260 }, { "epoch": 0.78, "logps_train/policy_1_2": -120.79625701904297, "logps_train/policy_1_l": -148.132568359375, "logps_train/policy_1_w": -142.62644958496094, "logps_train/policy_2_2": -90.52824401855469, "logps_train/policy_2_w": -184.77655029296875, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": 1.595374345779419, "rewards_train/1-l": -0.9590581655502319, "rewards_train/1-w": 2.717042922973633, "rewards_train/2-2": 2.359675645828247, "rewards_train/2-w": 1.3629696369171143, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6761010885238647, "rewards_train/margins_1": 1.1216685771942139, "rewards_train/margins_2": 0.9967060089111328, "step": 260 }, { "epoch": 0.78, "logps_train/policy_1_2": -179.21954345703125, "logps_train/policy_1_l": -177.26783752441406, "logps_train/policy_1_w": -133.94366455078125, "logps_train/policy_2_2": -148.93386840820312, "logps_train/policy_2_w": -188.47689819335938, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.1376168727874756, "rewards_train/1-l": -0.8681910634040833, "rewards_train/1-w": 3.045867681503296, "rewards_train/2-2": 1.9589574337005615, "rewards_train/2-w": 1.704458236694336, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.914058744907379, "rewards_train/margins_1": 1.9082508087158203, "rewards_train/margins_2": 0.2544991970062256, "step": 260 }, { "epoch": 0.78, "logps_train/policy_1_2": -151.28167724609375, "logps_train/policy_1_l": -204.50640869140625, "logps_train/policy_1_w": -139.94895935058594, "logps_train/policy_2_2": -128.025146484375, "logps_train/policy_2_w": -163.17037963867188, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.9562084674835205, "rewards_train/1-l": -2.1846251487731934, "rewards_train/1-w": 2.666041612625122, "rewards_train/2-2": 2.448265552520752, "rewards_train/2-w": 1.847414255142212, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.850666761398315, "rewards_train/margins_1": 0.7098331451416016, "rewards_train/margins_2": 0.60085129737854, "step": 260 }, { "epoch": 0.78, "logps_train/policy_1_2": -124.45735168457031, "logps_train/policy_1_l": -61.51767349243164, "logps_train/policy_1_w": -72.1030044555664, "logps_train/policy_2_2": -110.53773498535156, "logps_train/policy_2_w": -86.48600769042969, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -54.25, "logps_train/ref_1_w": -82.5, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -94.5, "rewards_train/1-2": 1.2120771408081055, "rewards_train/1-l": -0.7199316024780273, "rewards_train/1-w": 1.0502468347549438, "rewards_train/2-2": 1.9337272644042969, "rewards_train/2-w": 0.7701486945152283, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.7701784372329712, "rewards_train/margins_1": -0.16183030605316162, "rewards_train/margins_2": 1.1635785698890686, "step": 260 }, { "epoch": 0.78, "logps_train/policy_1_2": -161.90509033203125, "logps_train/policy_1_l": -162.32363891601562, "logps_train/policy_1_w": -104.6873779296875, "logps_train/policy_2_2": -127.92961120605469, "logps_train/policy_2_w": -128.06954956054688, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.1532413959503174, "rewards_train/1-l": -1.2065829038619995, "rewards_train/1-w": 2.03751277923584, "rewards_train/2-2": 2.1820387840270996, "rewards_train/2-w": 1.2446074485778809, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.2440956830978394, "rewards_train/margins_1": 0.8842713832855225, "rewards_train/margins_2": 0.9374313354492188, "step": 260 }, { "epoch": 0.78, "logps_train/policy_1_2": -113.3872299194336, "logps_train/policy_1_l": -128.22120666503906, "logps_train/policy_1_w": -75.77058410644531, "logps_train/policy_2_2": -81.85623931884766, "logps_train/policy_2_w": -107.607666015625, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -100.5, "logps_train/ref_2_w": -122.5, "rewards_train/1-2": 1.0948705673217773, "rewards_train/1-l": -1.54594886302948, "rewards_train/1-w": 1.9952073097229004, "rewards_train/2-2": 1.8784387111663818, "rewards_train/2-w": 1.4681396484375, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5411561727523804, "rewards_train/margins_1": 0.900336742401123, "rewards_train/margins_2": 0.41029906272888184, "step": 261 }, { "epoch": 0.78, "logps_train/policy_1_2": -125.75271606445312, "logps_train/policy_1_l": -122.08531188964844, "logps_train/policy_1_w": -128.9609832763672, "logps_train/policy_2_2": -105.371826171875, "logps_train/policy_2_w": -155.35769653320312, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.2841031551361084, "rewards_train/1-l": -0.9944694638252258, "rewards_train/1-w": 2.161714553833008, "rewards_train/2-2": 1.909692406654358, "rewards_train/2-w": 1.2962613105773926, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.1561840176582336, "rewards_train/margins_1": 0.8776113986968994, "rewards_train/margins_2": 0.6134310960769653, "step": 261 }, { "epoch": 0.78, "logps_train/policy_1_2": -143.71658325195312, "logps_train/policy_1_l": -129.63070678710938, "logps_train/policy_1_w": -111.42704772949219, "logps_train/policy_2_2": -121.04976654052734, "logps_train/policy_2_w": -134.5832061767578, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.1002154350280762, "rewards_train/1-l": -1.1349461078643799, "rewards_train/1-w": 2.591670513153076, "rewards_train/2-2": 1.6376018524169922, "rewards_train/2-w": 1.801835298538208, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.726616621017456, "rewards_train/margins_1": 1.491455078125, "rewards_train/margins_2": -0.16423344612121582, "step": 261 }, { "epoch": 0.78, "logps_train/policy_1_2": -88.82218933105469, "logps_train/policy_1_l": -49.263824462890625, "logps_train/policy_1_w": -56.86833953857422, "logps_train/policy_2_2": -63.86708068847656, "logps_train/policy_2_w": -72.69852447509766, "logps_train/ref_1_2": -96.0, "logps_train/ref_1_l": -45.5, "logps_train/ref_1_w": -74.0, "logps_train/ref_2_2": -78.0, "logps_train/ref_2_w": -85.0, "rewards_train/1-2": 0.7132888436317444, "rewards_train/1-l": -0.36173373460769653, "rewards_train/1-w": 1.7000799179077148, "rewards_train/2-2": 1.3976666927337646, "rewards_train/2-w": 1.223116159439087, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.0618136525154114, "rewards_train/margins_1": 0.9867910742759705, "rewards_train/margins_2": 0.17455053329467773, "step": 261 }, { "epoch": 0.78, "logps_train/policy_1_2": -123.81770324707031, "logps_train/policy_1_l": -145.81101989746094, "logps_train/policy_1_w": -123.71808624267578, "logps_train/policy_2_2": -97.99725341796875, "logps_train/policy_2_w": -159.84329223632812, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -113.5, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 0.9049490690231323, "rewards_train/1-l": -1.6877427101135254, "rewards_train/1-w": 2.7703795433044434, "rewards_train/2-2": 1.5658998489379883, "rewards_train/2-w": 1.3969216346740723, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.458122253417969, "rewards_train/margins_1": 1.865430474281311, "rewards_train/margins_2": 0.16897821426391602, "step": 261 }, { "epoch": 0.78, "logps_train/policy_1_2": -254.73910522460938, "logps_train/policy_1_l": -185.53466796875, "logps_train/policy_1_w": -134.066162109375, "logps_train/policy_2_2": -193.4945831298828, "logps_train/policy_2_w": -176.025146484375, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.459291934967041, "rewards_train/1-l": -2.349560260772705, "rewards_train/1-w": 2.5156495571136475, "rewards_train/2-2": 3.7710490226745605, "rewards_train/2-w": 1.2375237941741943, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.8652098178863525, "rewards_train/margins_1": 1.0563576221466064, "rewards_train/margins_2": 2.533525228500366, "step": 261 }, { "epoch": 0.78, "logps_train/policy_1_2": -78.96710205078125, "logps_train/policy_1_l": -97.81072235107422, "logps_train/policy_1_w": -73.74906921386719, "logps_train/policy_2_2": -63.439903259277344, "logps_train/policy_2_w": -92.85310363769531, "logps_train/ref_1_2": -91.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -77.5, "logps_train/ref_2_w": -107.0, "rewards_train/1-2": 1.2501643896102905, "rewards_train/1-l": -1.4616632461547852, "rewards_train/1-w": 1.9461870193481445, "rewards_train/2-2": 1.4028847217559814, "rewards_train/2-w": 1.4006268978118896, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.4078502655029297, "rewards_train/margins_1": 0.696022629737854, "rewards_train/margins_2": 0.002257823944091797, "step": 261 }, { "epoch": 0.78, "logps_train/policy_1_2": -229.78579711914062, "logps_train/policy_1_l": -312.9287109375, "logps_train/policy_1_w": -184.86724853515625, "logps_train/policy_2_2": -185.2297821044922, "logps_train/policy_2_w": -227.2154541015625, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -274.0, "logps_train/ref_1_w": -209.0, "logps_train/ref_2_2": -215.0, "logps_train/ref_2_w": -242.0, "rewards_train/1-2": 1.5511075258255005, "rewards_train/1-l": -3.83388614654541, "rewards_train/1-w": 2.4460883140563965, "rewards_train/2-2": 2.933272361755371, "rewards_train/2-w": 1.4253300428390503, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.279974460601807, "rewards_train/margins_1": 0.894980788230896, "rewards_train/margins_2": 1.5079423189163208, "step": 261 }, { "epoch": 0.78, "learning_rate": 3.5751630056913017e-06, "loss": 0.7178, "step": 262 }, { "epoch": 0.78, "logps_train/policy_1_2": -142.82705688476562, "logps_train/policy_1_l": -130.39102172851562, "logps_train/policy_1_w": -114.9177017211914, "logps_train/policy_2_2": -103.21986389160156, "logps_train/policy_2_w": -157.4300079345703, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": 1.3282313346862793, "rewards_train/1-l": -1.7187896966934204, "rewards_train/1-w": 2.708620548248291, "rewards_train/2-2": 2.1889514923095703, "rewards_train/2-w": 1.364421010017395, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.427410244941711, "rewards_train/margins_1": 1.3803892135620117, "rewards_train/margins_2": 0.8245304822921753, "step": 262 }, { "epoch": 0.78, "logps_train/policy_1_2": -68.60245513916016, "logps_train/policy_1_l": -113.55415344238281, "logps_train/policy_1_w": -63.46165084838867, "logps_train/policy_2_2": -55.55031967163086, "logps_train/policy_2_w": -82.99363708496094, "logps_train/ref_1_2": -75.0, "logps_train/ref_1_l": -101.5, "logps_train/ref_1_w": -74.0, "logps_train/ref_2_2": -65.0, "logps_train/ref_2_w": -88.0, "rewards_train/1-2": 0.6155354976654053, "rewards_train/1-l": -1.2183551788330078, "rewards_train/1-w": 1.0678974390029907, "rewards_train/2-2": 0.9812964200973511, "rewards_train/2-w": 0.5350106954574585, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.2862526178359985, "rewards_train/margins_1": 0.45236194133758545, "rewards_train/margins_2": 0.4462857246398926, "step": 262 }, { "epoch": 0.78, "logps_train/policy_1_2": -127.597412109375, "logps_train/policy_1_l": -172.82745361328125, "logps_train/policy_1_w": -119.8897705078125, "logps_train/policy_2_2": -99.42228698730469, "logps_train/policy_2_w": -154.3475799560547, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.498071312904358, "rewards_train/1-l": -1.8300100564956665, "rewards_train/1-w": 1.9539912939071655, "rewards_train/2-2": 1.9480057954788208, "rewards_train/2-w": 0.7144608497619629, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.784001350402832, "rewards_train/margins_1": 0.4559199810028076, "rewards_train/margins_2": 1.233544945716858, "step": 262 }, { "epoch": 0.78, "logps_train/policy_1_2": -147.66729736328125, "logps_train/policy_1_l": -148.10934448242188, "logps_train/policy_1_w": -79.37384033203125, "logps_train/policy_2_2": -121.14352416992188, "logps_train/policy_2_w": -105.8048324584961, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 0.9317081570625305, "rewards_train/1-l": -1.9695277214050293, "rewards_train/1-w": 1.8161323070526123, "rewards_train/2-2": 1.8168977499008179, "rewards_train/2-w": 1.1960790157318115, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7856600284576416, "rewards_train/margins_1": 0.8844241499900818, "rewards_train/margins_2": 0.6208187341690063, "step": 262 }, { "epoch": 0.78, "logps_train/policy_1_2": -78.6925048828125, "logps_train/policy_1_l": -39.62922668457031, "logps_train/policy_1_w": -64.91244506835938, "logps_train/policy_2_2": -59.54244613647461, "logps_train/policy_2_w": -75.67393493652344, "logps_train/ref_1_2": -89.0, "logps_train/ref_1_l": -34.0, "logps_train/ref_1_w": -82.0, "logps_train/ref_2_2": -76.5, "logps_train/ref_2_w": -87.0, "rewards_train/1-2": 1.0002812147140503, "rewards_train/1-l": -0.5597975850105286, "rewards_train/1-w": 1.7365872859954834, "rewards_train/2-2": 1.6965365409851074, "rewards_train/2-w": 1.1259658336639404, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.296384871006012, "rewards_train/margins_1": 0.7363060712814331, "rewards_train/margins_2": 0.570570707321167, "step": 262 }, { "epoch": 0.78, "logps_train/policy_1_2": -78.83848571777344, "logps_train/policy_1_l": -70.85574340820312, "logps_train/policy_1_w": -52.99314880371094, "logps_train/policy_2_2": -61.20311737060547, "logps_train/policy_2_w": -69.53828430175781, "logps_train/ref_1_2": -88.5, "logps_train/ref_1_l": -62.75, "logps_train/ref_1_w": -65.5, "logps_train/ref_2_2": -75.5, "logps_train/ref_2_w": -76.0, "rewards_train/1-2": 0.9620492458343506, "rewards_train/1-l": -0.8051050901412964, "rewards_train/1-w": 1.2229509353637695, "rewards_train/2-2": 1.4136724472045898, "rewards_train/2-w": 0.6606248021125793, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.028056025505066, "rewards_train/margins_1": 0.26090168952941895, "rewards_train/margins_2": 0.7530476450920105, "step": 262 }, { "epoch": 0.78, "logps_train/policy_1_2": -143.79127502441406, "logps_train/policy_1_l": -136.43856811523438, "logps_train/policy_1_w": -99.60087585449219, "logps_train/policy_2_2": -118.64461517333984, "logps_train/policy_2_w": -120.13679504394531, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 1.5599355697631836, "rewards_train/1-l": -1.6698346138000488, "rewards_train/1-w": 3.0434279441833496, "rewards_train/2-2": 2.3113203048706055, "rewards_train/2-w": 2.4895429611206055, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.713262557983398, "rewards_train/margins_1": 1.483492374420166, "rewards_train/margins_2": -0.17822265625, "step": 262 }, { "epoch": 0.78, "logps_train/policy_1_2": -152.50650024414062, "logps_train/policy_1_l": -158.22012329101562, "logps_train/policy_1_w": -97.35924530029297, "logps_train/policy_2_2": -123.9103012084961, "logps_train/policy_2_w": -125.21705627441406, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": 1.6649764776229858, "rewards_train/1-l": -1.078262209892273, "rewards_train/1-w": 1.695325493812561, "rewards_train/2-2": 2.4105324745178223, "rewards_train/2-w": 1.2103252410888672, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.773587703704834, "rewards_train/margins_1": 0.030349016189575195, "rewards_train/margins_2": 1.200207233428955, "step": 262 }, { "epoch": 0.79, "logps_train/policy_1_2": -164.61801147460938, "logps_train/policy_1_l": -185.07037353515625, "logps_train/policy_1_w": -177.1643829345703, "logps_train/policy_2_2": -136.68435668945312, "logps_train/policy_2_w": -230.4468536376953, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -247.0, "rewards_train/1-2": 1.9163249731063843, "rewards_train/1-l": -1.7519605159759521, "rewards_train/1-w": 3.086686134338379, "rewards_train/2-2": 2.3784379959106445, "rewards_train/2-w": 1.699064016342163, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.838646650314331, "rewards_train/margins_1": 1.1703611612319946, "rewards_train/margins_2": 0.6793739795684814, "step": 263 }, { "epoch": 0.79, "logps_train/policy_1_2": -91.6795425415039, "logps_train/policy_1_l": -105.10565185546875, "logps_train/policy_1_w": -106.59603881835938, "logps_train/policy_2_2": -73.79646301269531, "logps_train/policy_2_w": -129.86585998535156, "logps_train/ref_1_2": -101.5, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -88.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.0076565742492676, "rewards_train/1-l": -1.0928651094436646, "rewards_train/1-w": 1.6403965950012207, "rewards_train/2-2": 1.4541428089141846, "rewards_train/2-w": 1.0102887153625488, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7332617044448853, "rewards_train/margins_1": 0.6327400207519531, "rewards_train/margins_2": 0.44385409355163574, "step": 263 }, { "epoch": 0.79, "logps_train/policy_1_2": -138.43557739257812, "logps_train/policy_1_l": -131.39300537109375, "logps_train/policy_1_w": -163.70864868164062, "logps_train/policy_2_2": -98.51826477050781, "logps_train/policy_2_w": -207.14224243164062, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.2279267311096191, "rewards_train/1-l": -0.5369570851325989, "rewards_train/1-w": 2.194758892059326, "rewards_train/2-2": 1.711064338684082, "rewards_train/2-w": 1.038900375366211, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.731715977191925, "rewards_train/margins_1": 0.966832160949707, "rewards_train/margins_2": 0.6721639633178711, "step": 263 }, { "epoch": 0.79, "logps_train/policy_1_2": -130.15296936035156, "logps_train/policy_1_l": -130.7759246826172, "logps_train/policy_1_w": -83.32953643798828, "logps_train/policy_2_2": -101.90861511230469, "logps_train/policy_2_w": -106.75419616699219, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 1.3534538745880127, "rewards_train/1-l": -1.76772940158844, "rewards_train/1-w": 2.2717342376708984, "rewards_train/2-2": 1.8122625350952148, "rewards_train/2-w": 1.516767978668213, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.039463639259338, "rewards_train/margins_1": 0.9182803630828857, "rewards_train/margins_2": 0.29549455642700195, "step": 263 }, { "epoch": 0.79, "logps_train/policy_1_2": -160.40185546875, "logps_train/policy_1_l": -161.91558837890625, "logps_train/policy_1_w": -137.37632751464844, "logps_train/policy_2_2": -129.05857849121094, "logps_train/policy_2_w": -190.50888061523438, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.3488776683807373, "rewards_train/1-l": -0.3569892644882202, "rewards_train/1-w": 2.6201796531677246, "rewards_train/2-2": 2.197267532348633, "rewards_train/2-w": 1.3022360801696777, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.977168917655945, "rewards_train/margins_1": 1.2713019847869873, "rewards_train/margins_2": 0.8950314521789551, "step": 263 }, { "epoch": 0.79, "logps_train/policy_1_2": -101.4644546508789, "logps_train/policy_1_l": -136.55990600585938, "logps_train/policy_1_w": -90.51042175292969, "logps_train/policy_2_2": -90.63919067382812, "logps_train/policy_2_w": -111.83820343017578, "logps_train/ref_1_2": -115.5, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 1.402773380279541, "rewards_train/1-l": -1.4505218267440796, "rewards_train/1-w": 1.5052076578140259, "rewards_train/2-2": 1.9111783504486084, "rewards_train/2-w": 0.9685232043266296, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.9557294845581055, "rewards_train/margins_1": 0.10243427753448486, "rewards_train/margins_2": 0.9426551461219788, "step": 263 }, { "epoch": 0.79, "logps_train/policy_1_2": -199.775390625, "logps_train/policy_1_l": -192.6948699951172, "logps_train/policy_1_w": -130.211669921875, "logps_train/policy_2_2": -146.323486328125, "logps_train/policy_2_w": -188.49729919433594, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 0.6990244388580322, "rewards_train/1-l": -2.3812060356140137, "rewards_train/1-w": 3.0944571495056152, "rewards_train/2-2": 2.1028075218200684, "rewards_train/2-w": 1.4721450805664062, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.475663185119629, "rewards_train/margins_1": 2.395432710647583, "rewards_train/margins_2": 0.6306624412536621, "step": 263 }, { "epoch": 0.79, "logps_train/policy_1_2": -198.27908325195312, "logps_train/policy_1_l": -194.2952880859375, "logps_train/policy_1_w": -143.501953125, "logps_train/policy_2_2": -158.3604736328125, "logps_train/policy_2_w": -192.43641662597656, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.884591817855835, "rewards_train/1-l": -1.9404659271240234, "rewards_train/1-w": 3.5810546875, "rewards_train/2-2": 3.6483278274536133, "rewards_train/2-w": 2.1235456466674805, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.521520614624023, "rewards_train/margins_1": 1.696462869644165, "rewards_train/margins_2": 1.5247821807861328, "step": 263 }, { "epoch": 0.79, "learning_rate": 3.552813550515408e-06, "loss": 0.7662, "step": 264 }, { "epoch": 0.79, "logps_train/policy_1_2": -84.38554382324219, "logps_train/policy_1_l": -84.83528137207031, "logps_train/policy_1_w": -89.2389907836914, "logps_train/policy_2_2": -71.93864440917969, "logps_train/policy_2_w": -110.02916717529297, "logps_train/ref_1_2": -92.0, "logps_train/ref_1_l": -77.5, "logps_train/ref_1_w": -109.5, "logps_train/ref_2_2": -82.5, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 0.7583208084106445, "rewards_train/1-l": -0.7536454200744629, "rewards_train/1-w": 2.0214133262634277, "rewards_train/2-2": 1.0502766370773315, "rewards_train/2-w": 1.4892710447311401, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.125, "rewards_train/margins": 2.7750587463378906, "rewards_train/margins_1": 1.2630925178527832, "rewards_train/margins_2": -0.4389944076538086, "step": 264 }, { "epoch": 0.79, "logps_train/policy_1_2": -95.2137451171875, "logps_train/policy_1_l": -208.37770080566406, "logps_train/policy_1_w": -112.23330688476562, "logps_train/policy_2_2": -80.81262969970703, "logps_train/policy_2_w": -140.65328979492188, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -97.5, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.3762811422348022, "rewards_train/1-l": -2.1690196990966797, "rewards_train/1-w": 2.614168643951416, "rewards_train/2-2": 1.671862244606018, "rewards_train/2-w": 1.7690460681915283, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.783188343048096, "rewards_train/margins_1": 1.2378875017166138, "rewards_train/margins_2": -0.09718382358551025, "step": 264 }, { "epoch": 0.79, "logps_train/policy_1_2": -155.59317016601562, "logps_train/policy_1_l": -79.40375518798828, "logps_train/policy_1_w": -113.84867858886719, "logps_train/policy_2_2": -107.51377868652344, "logps_train/policy_2_w": -159.79112243652344, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -68.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.4938078820705414, "rewards_train/1-l": -1.0739694833755493, "rewards_train/1-w": 2.0702104568481445, "rewards_train/2-2": 2.2611217498779297, "rewards_train/2-w": 0.7740130424499512, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.144179940223694, "rewards_train/margins_1": 1.5764025747776031, "rewards_train/margins_2": 1.4871087074279785, "step": 264 }, { "epoch": 0.79, "logps_train/policy_1_2": -143.88983154296875, "logps_train/policy_1_l": -140.85470581054688, "logps_train/policy_1_w": -108.52079010009766, "logps_train/policy_2_2": -111.10263061523438, "logps_train/policy_2_w": -140.55496215820312, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.6719541549682617, "rewards_train/1-l": -2.443869113922119, "rewards_train/1-w": 2.3948936462402344, "rewards_train/2-2": 2.4514553546905518, "rewards_train/2-w": 1.4561240673065186, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.8387627601623535, "rewards_train/margins_1": 0.7229394912719727, "rewards_train/margins_2": 0.9953312873840332, "step": 264 }, { "epoch": 0.79, "logps_train/policy_1_2": -128.24166870117188, "logps_train/policy_1_l": -106.72846984863281, "logps_train/policy_1_w": -123.61754608154297, "logps_train/policy_2_2": -94.2520523071289, "logps_train/policy_2_w": -166.70640563964844, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -96.5, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 0.5672404766082764, "rewards_train/1-l": -1.014448881149292, "rewards_train/1-w": 2.2568979263305664, "rewards_train/2-2": 1.6802632808685303, "rewards_train/2-w": 1.0805306434631348, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2713468074798584, "rewards_train/margins_1": 1.68965744972229, "rewards_train/margins_2": 0.5997326374053955, "step": 264 }, { "epoch": 0.79, "logps_train/policy_1_2": -156.03616333007812, "logps_train/policy_1_l": -171.33303833007812, "logps_train/policy_1_w": -135.53439331054688, "logps_train/policy_2_2": -132.85870361328125, "logps_train/policy_2_w": -171.79421997070312, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.81435227394104, "rewards_train/1-l": -2.487208843231201, "rewards_train/1-w": 2.1996865272521973, "rewards_train/2-2": 2.2758476734161377, "rewards_train/2-w": 0.8119829297065735, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.686895370483398, "rewards_train/margins_1": 0.3853342533111572, "rewards_train/margins_2": 1.4638647437095642, "step": 264 }, { "epoch": 0.79, "logps_train/policy_1_2": -190.29933166503906, "logps_train/policy_1_l": -197.2908477783203, "logps_train/policy_1_w": -126.4129638671875, "logps_train/policy_2_2": -146.05242919921875, "logps_train/policy_2_w": -168.66075134277344, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.4747552871704102, "rewards_train/1-l": -1.8083820343017578, "rewards_train/1-w": 3.3258914947509766, "rewards_train/2-2": 2.461162805557251, "rewards_train/2-w": 1.875331163406372, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.134273529052734, "rewards_train/margins_1": 1.8511362075805664, "rewards_train/margins_2": 0.5858316421508789, "step": 264 }, { "epoch": 0.79, "logps_train/policy_1_2": -176.10142517089844, "logps_train/policy_1_l": -100.84773254394531, "logps_train/policy_1_w": -96.01773071289062, "logps_train/policy_2_2": -137.45391845703125, "logps_train/policy_2_w": -129.19924926757812, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -124.5, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.4039206504821777, "rewards_train/1-l": -0.7847727537155151, "rewards_train/1-w": 2.824789524078369, "rewards_train/2-2": 2.7092947959899902, "rewards_train/2-w": 1.8941359519958496, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.6095622777938843, "rewards_train/margins_1": 1.4208688735961914, "rewards_train/margins_2": 0.8151588439941406, "step": 264 }, { "epoch": 0.79, "logps_train/policy_1_2": -129.97186279296875, "logps_train/policy_1_l": -162.47760009765625, "logps_train/policy_1_w": -115.9901351928711, "logps_train/policy_2_2": -100.73774719238281, "logps_train/policy_2_w": -143.1060028076172, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.4059388637542725, "rewards_train/1-l": -1.229010820388794, "rewards_train/1-w": 1.774228572845459, "rewards_train/2-2": 1.782475471496582, "rewards_train/2-w": 0.9745562076568604, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.003239393234253, "rewards_train/margins_1": 0.3682897090911865, "rewards_train/margins_2": 0.8079192638397217, "step": 265 }, { "epoch": 0.79, "logps_train/policy_1_2": -115.11905670166016, "logps_train/policy_1_l": -201.7774658203125, "logps_train/policy_1_w": -162.747802734375, "logps_train/policy_2_2": -94.46482849121094, "logps_train/policy_2_w": -192.9296875, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": 1.274031639099121, "rewards_train/1-l": -2.7295033931732178, "rewards_train/1-w": 2.7830312252044678, "rewards_train/2-2": 1.7648444175720215, "rewards_train/2-w": 1.4382827281951904, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.5125346183776855, "rewards_train/margins_1": 1.5089995861053467, "rewards_train/margins_2": 0.32656168937683105, "step": 265 }, { "epoch": 0.79, "logps_train/policy_1_2": -124.23530578613281, "logps_train/policy_1_l": -181.75106811523438, "logps_train/policy_1_w": -144.07473754882812, "logps_train/policy_2_2": -106.65264892578125, "logps_train/policy_2_w": -171.32839965820312, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.89834463596344, "rewards_train/1-l": -1.818856120109558, "rewards_train/1-w": 2.235494613647461, "rewards_train/2-2": 2.290985584259033, "rewards_train/2-w": 1.490207314491272, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.054350733757019, "rewards_train/margins_1": 0.337149977684021, "rewards_train/margins_2": 0.8007782697677612, "step": 265 }, { "epoch": 0.79, "logps_train/policy_1_2": -221.1433868408203, "logps_train/policy_1_l": -230.12796020507812, "logps_train/policy_1_w": -115.52992248535156, "logps_train/policy_2_2": -180.89398193359375, "logps_train/policy_2_w": -146.53363037109375, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.2544125318527222, "rewards_train/1-l": -2.354983329772949, "rewards_train/1-w": 1.8837257623672485, "rewards_train/2-2": 2.6106009483337402, "rewards_train/2-w": 1.3974175453186035, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.238709092140198, "rewards_train/margins_1": 0.6293132305145264, "rewards_train/margins_2": 1.2131834030151367, "step": 265 }, { "epoch": 0.79, "logps_train/policy_1_2": -258.39263916015625, "logps_train/policy_1_l": -245.8020477294922, "logps_train/policy_1_w": -129.78411865234375, "logps_train/policy_2_2": -221.0111541748047, "logps_train/policy_2_w": -146.384521484375, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -246.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.1451106071472168, "rewards_train/1-l": -1.9934861660003662, "rewards_train/1-w": 2.123931884765625, "rewards_train/2-2": 2.389509439468384, "rewards_train/2-w": 1.9607661962509155, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.117418050765991, "rewards_train/margins_1": 0.9788212776184082, "rewards_train/margins_2": 0.42874324321746826, "step": 265 }, { "epoch": 0.79, "logps_train/policy_1_2": -142.63507080078125, "logps_train/policy_1_l": -85.39952087402344, "logps_train/policy_1_w": -85.1526107788086, "logps_train/policy_2_2": -108.22151184082031, "logps_train/policy_2_w": -109.66880798339844, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -76.0, "logps_train/ref_1_w": -99.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 0.7802433967590332, "rewards_train/1-l": -0.9742295742034912, "rewards_train/1-w": 1.3956762552261353, "rewards_train/2-2": 1.9731619358062744, "rewards_train/2-w": 0.4499156177043915, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.3699058294296265, "rewards_train/margins_1": 0.615432858467102, "rewards_train/margins_2": 1.523246318101883, "step": 265 }, { "epoch": 0.79, "logps_train/policy_1_2": -239.38742065429688, "logps_train/policy_1_l": -309.6268310546875, "logps_train/policy_1_w": -221.7662353515625, "logps_train/policy_2_2": -202.9263916015625, "logps_train/policy_2_w": -281.85211181640625, "logps_train/ref_1_2": -253.0, "logps_train/ref_1_l": -276.0, "logps_train/ref_1_w": -268.0, "logps_train/ref_2_2": -234.0, "logps_train/ref_2_w": -312.0, "rewards_train/1-2": 1.37375807762146, "rewards_train/1-l": -3.3533101081848145, "rewards_train/1-w": 4.511853218078613, "rewards_train/2-2": 3.065173625946045, "rewards_train/2-w": 3.078850746154785, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 7.865163326263428, "rewards_train/margins_1": 3.1380951404571533, "rewards_train/margins_2": -0.013677120208740234, "step": 265 }, { "epoch": 0.79, "logps_train/policy_1_2": -209.7017364501953, "logps_train/policy_1_l": -222.5789794921875, "logps_train/policy_1_w": -151.86114501953125, "logps_train/policy_2_2": -189.99945068359375, "logps_train/policy_2_w": -180.71072387695312, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 2.6181087493896484, "rewards_train/1-l": -1.8282115459442139, "rewards_train/1-w": 3.1154468059539795, "rewards_train/2-2": 3.2164597511291504, "rewards_train/2-w": 2.3617396354675293, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.943658351898193, "rewards_train/margins_1": 0.49733805656433105, "rewards_train/margins_2": 0.8547201156616211, "step": 265 }, { "epoch": 0.8, "learning_rate": 3.5303613425929805e-06, "loss": 0.6805, "step": 266 }, { "epoch": 0.8, "logps_train/policy_1_2": -116.4769058227539, "logps_train/policy_1_l": -163.11415100097656, "logps_train/policy_1_w": -103.40697479248047, "logps_train/policy_2_2": -93.3517074584961, "logps_train/policy_2_w": -128.43048095703125, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 0.7518207430839539, "rewards_train/1-l": -2.0561413764953613, "rewards_train/1-w": 1.8943613767623901, "rewards_train/2-2": 1.503354787826538, "rewards_train/2-w": 1.1077336072921753, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.9505027532577515, "rewards_train/margins_1": 1.1425406336784363, "rewards_train/margins_2": 0.3956211805343628, "step": 266 }, { "epoch": 0.8, "logps_train/policy_1_2": -123.36515045166016, "logps_train/policy_1_l": -182.91160583496094, "logps_train/policy_1_w": -119.68978881835938, "logps_train/policy_2_2": -96.6197509765625, "logps_train/policy_2_w": -165.8328094482422, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.9384851455688477, "rewards_train/1-l": -1.539207935333252, "rewards_train/1-w": 1.7992833852767944, "rewards_train/2-2": 1.734508991241455, "rewards_train/2-w": 0.15500018000602722, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3384913206100464, "rewards_train/margins_1": 0.8607982397079468, "rewards_train/margins_2": 1.5795088112354279, "step": 266 }, { "epoch": 0.8, "logps_train/policy_1_2": -188.93292236328125, "logps_train/policy_1_l": -243.4622039794922, "logps_train/policy_1_w": -144.1197509765625, "logps_train/policy_2_2": -152.88232421875, "logps_train/policy_2_w": -180.58180236816406, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -215.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 1.3020210266113281, "rewards_train/1-l": -2.784501075744629, "rewards_train/1-w": 3.376307249069214, "rewards_train/2-2": 2.4242660999298096, "rewards_train/2-w": 2.2722878456115723, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.160808324813843, "rewards_train/margins_1": 2.0742862224578857, "rewards_train/margins_2": 0.1519782543182373, "step": 266 }, { "epoch": 0.8, "logps_train/policy_1_2": -79.03111267089844, "logps_train/policy_1_l": -70.14486694335938, "logps_train/policy_1_w": -36.37010192871094, "logps_train/policy_2_2": -58.12232971191406, "logps_train/policy_2_w": -54.084922790527344, "logps_train/ref_1_2": -86.0, "logps_train/ref_1_l": -58.25, "logps_train/ref_1_w": -47.5, "logps_train/ref_2_2": -70.0, "logps_train/ref_2_w": -58.25, "rewards_train/1-2": 0.6974744200706482, "rewards_train/1-l": -1.1861175298690796, "rewards_train/1-w": 1.103224277496338, "rewards_train/2-2": 1.164036750793457, "rewards_train/2-w": 0.4086952209472656, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.2893418073654175, "rewards_train/margins_1": 0.4057498574256897, "rewards_train/margins_2": 0.7553415298461914, "step": 266 }, { "epoch": 0.8, "logps_train/policy_1_2": -172.21426391601562, "logps_train/policy_1_l": -239.5244140625, "logps_train/policy_1_w": -187.26480102539062, "logps_train/policy_2_2": -144.42738342285156, "logps_train/policy_2_w": -230.2169647216797, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 2.229355812072754, "rewards_train/1-l": -2.8633787631988525, "rewards_train/1-w": 3.2305521965026855, "rewards_train/2-2": 3.0150747299194336, "rewards_train/2-w": 1.6884608268737793, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.093930959701538, "rewards_train/margins_1": 1.0011963844299316, "rewards_train/margins_2": 1.3266139030456543, "step": 266 }, { "epoch": 0.8, "logps_train/policy_1_2": -185.4110107421875, "logps_train/policy_1_l": -127.782958984375, "logps_train/policy_1_w": -117.68746948242188, "logps_train/policy_2_2": -146.74618530273438, "logps_train/policy_2_w": -161.82247924804688, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.2792102098464966, "rewards_train/1-l": -0.909936785697937, "rewards_train/1-w": 2.337502956390381, "rewards_train/2-2": 2.4863195419311523, "rewards_train/2-w": 1.3958783149719238, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.247439742088318, "rewards_train/margins_1": 1.0582927465438843, "rewards_train/margins_2": 1.0904412269592285, "step": 266 }, { "epoch": 0.8, "logps_train/policy_1_2": -193.8849334716797, "logps_train/policy_1_l": -244.03326416015625, "logps_train/policy_1_w": -151.98965454101562, "logps_train/policy_2_2": -169.837158203125, "logps_train/policy_2_w": -184.87850952148438, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -219.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.314632534980774, "rewards_train/1-l": -2.4841859340667725, "rewards_train/1-w": 2.9557225704193115, "rewards_train/2-2": 2.2037835121154785, "rewards_train/2-w": 1.8543357849121094, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.439908504486084, "rewards_train/margins_1": 1.6410900354385376, "rewards_train/margins_2": 0.34944772720336914, "step": 266 }, { "epoch": 0.8, "logps_train/policy_1_2": -171.8302001953125, "logps_train/policy_1_l": -235.64181518554688, "logps_train/policy_1_w": -146.6320343017578, "logps_train/policy_2_2": -135.86080932617188, "logps_train/policy_2_w": -176.447265625, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -220.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.0935418605804443, "rewards_train/1-l": -1.5171111822128296, "rewards_train/1-w": 2.0828897953033447, "rewards_train/2-2": 1.751419186592102, "rewards_train/2-w": 0.8608401417732239, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.6000009775161743, "rewards_train/margins_1": 0.9893479347229004, "rewards_train/margins_2": 0.8905790448188782, "step": 266 }, { "epoch": 0.8, "logps_train/policy_1_2": -126.93086242675781, "logps_train/policy_1_l": -90.83863830566406, "logps_train/policy_1_w": -101.57185363769531, "logps_train/policy_2_2": -92.71964263916016, "logps_train/policy_2_w": -133.63766479492188, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -79.5, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 0.6178512573242188, "rewards_train/1-l": -1.0983173847198486, "rewards_train/1-w": 2.8506274223327637, "rewards_train/2-2": 1.8014731407165527, "rewards_train/2-w": 1.5002950429916382, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.9489448070526123, "rewards_train/margins_1": 2.232776165008545, "rewards_train/margins_2": 0.30117809772491455, "step": 267 }, { "epoch": 0.8, "logps_train/policy_1_2": -99.05859375, "logps_train/policy_1_l": -136.58335876464844, "logps_train/policy_1_w": -92.0826416015625, "logps_train/policy_2_2": -76.4144515991211, "logps_train/policy_2_w": -121.17561340332031, "logps_train/ref_1_2": -105.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 0.6222661733627319, "rewards_train/1-l": -2.327476739883423, "rewards_train/1-w": 1.2542364597320557, "rewards_train/2-2": 1.075351595878601, "rewards_train/2-w": 0.3730638027191162, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.5817131996154785, "rewards_train/margins_1": 0.6319702863693237, "rewards_train/margins_2": 0.7022877931594849, "step": 267 }, { "epoch": 0.8, "logps_train/policy_1_2": -99.8662109375, "logps_train/policy_1_l": -112.18978881835938, "logps_train/policy_1_w": -103.02519226074219, "logps_train/policy_2_2": -77.82505798339844, "logps_train/policy_2_w": -136.35092163085938, "logps_train/ref_1_2": -109.5, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 0.9618158340454102, "rewards_train/1-l": -1.195932149887085, "rewards_train/1-w": 1.9865429401397705, "rewards_train/2-2": 1.5167126655578613, "rewards_train/2-w": 1.239908218383789, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.1824750900268555, "rewards_train/margins_1": 1.0247271060943604, "rewards_train/margins_2": 0.27680444717407227, "step": 267 }, { "epoch": 0.8, "logps_train/policy_1_2": -127.49725341796875, "logps_train/policy_1_l": -163.81692504882812, "logps_train/policy_1_w": -108.47030639648438, "logps_train/policy_2_2": -98.10877227783203, "logps_train/policy_2_w": -142.0205841064453, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 1.4174625873565674, "rewards_train/1-l": -1.9973176717758179, "rewards_train/1-w": 2.599062442779541, "rewards_train/2-2": 2.167248249053955, "rewards_train/2-w": 1.7065355777740479, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.596380114555359, "rewards_train/margins_1": 1.1815998554229736, "rewards_train/margins_2": 0.4607126712799072, "step": 267 }, { "epoch": 0.8, "logps_train/policy_1_2": -233.17086791992188, "logps_train/policy_1_l": -234.83106994628906, "logps_train/policy_1_w": -109.49211120605469, "logps_train/policy_2_2": -207.138916015625, "logps_train/policy_2_w": -135.01956176757812, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -226.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.065726399421692, "rewards_train/1-l": -2.8870134353637695, "rewards_train/1-w": 2.592195987701416, "rewards_train/2-2": 1.986890196800232, "rewards_train/2-w": 1.8705049753189087, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.4792094230651855, "rewards_train/margins_1": 1.5264695882797241, "rewards_train/margins_2": 0.11638522148132324, "step": 267 }, { "epoch": 0.8, "logps_train/policy_1_2": -192.7320556640625, "logps_train/policy_1_l": -196.09536743164062, "logps_train/policy_1_w": -161.2058563232422, "logps_train/policy_2_2": -155.9175567626953, "logps_train/policy_2_w": -199.555419921875, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -223.0, "rewards_train/1-2": 1.454919457435608, "rewards_train/1-l": -1.9107091426849365, "rewards_train/1-w": 3.4387896060943604, "rewards_train/2-2": 2.6379318237304688, "rewards_train/2-w": 2.3725829124450684, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.349498748779297, "rewards_train/margins_1": 1.9838701486587524, "rewards_train/margins_2": 0.2653489112854004, "step": 267 }, { "epoch": 0.8, "logps_train/policy_1_2": -249.3960418701172, "logps_train/policy_1_l": -172.46047973632812, "logps_train/policy_1_w": -137.84130859375, "logps_train/policy_2_2": -185.96636962890625, "logps_train/policy_2_w": -183.59100341796875, "logps_train/ref_1_2": -256.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": 0.6244585514068604, "rewards_train/1-l": -1.7300317287445068, "rewards_train/1-w": 2.7096195220947266, "rewards_train/2-2": 2.7268009185791016, "rewards_train/2-w": 1.390899419784546, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.439651250839233, "rewards_train/margins_1": 2.085160970687866, "rewards_train/margins_2": 1.3359014987945557, "step": 267 }, { "epoch": 0.8, "logps_train/policy_1_2": -105.0633544921875, "logps_train/policy_1_l": -107.46784973144531, "logps_train/policy_1_w": -83.70010375976562, "logps_train/policy_2_2": -78.22901916503906, "logps_train/policy_2_w": -107.79994201660156, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 1.0151488780975342, "rewards_train/1-l": -1.301668643951416, "rewards_train/1-w": 1.7215913534164429, "rewards_train/2-2": 1.5110828876495361, "rewards_train/2-w": 1.0090690851211548, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.023259997367859, "rewards_train/margins_1": 0.7064424753189087, "rewards_train/margins_2": 0.5020138025283813, "step": 267 }, { "epoch": 0.8, "learning_rate": 3.5078085732199314e-06, "loss": 0.7068, "step": 268 }, { "epoch": 0.8, "logps_train/policy_1_2": -150.90293884277344, "logps_train/policy_1_l": -122.3808364868164, "logps_train/policy_1_w": -95.60652160644531, "logps_train/policy_2_2": -108.89544677734375, "logps_train/policy_2_w": -134.12283325195312, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 1.7175180912017822, "rewards_train/1-l": -1.0471071004867554, "rewards_train/1-w": 2.7877848148345947, "rewards_train/2-2": 2.7768616676330566, "rewards_train/2-w": 1.7228730916976929, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.83489191532135, "rewards_train/margins_1": 1.0702667236328125, "rewards_train/margins_2": 1.0539885759353638, "step": 268 }, { "epoch": 0.8, "logps_train/policy_1_2": -180.96368408203125, "logps_train/policy_1_l": -152.13589477539062, "logps_train/policy_1_w": -185.61126708984375, "logps_train/policy_2_2": -150.8564453125, "logps_train/policy_2_w": -210.2846221923828, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 2.2598817348480225, "rewards_train/1-l": -1.5190573930740356, "rewards_train/1-w": 2.566997528076172, "rewards_train/2-2": 3.0206046104431152, "rewards_train/2-w": 1.8590389490127563, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.0860549211502075, "rewards_train/margins_1": 0.3071157932281494, "rewards_train/margins_2": 1.1615656614303589, "step": 268 }, { "epoch": 0.8, "logps_train/policy_1_2": -80.66011047363281, "logps_train/policy_1_l": -62.32223892211914, "logps_train/policy_1_w": -91.69322204589844, "logps_train/policy_2_2": -64.68415069580078, "logps_train/policy_2_w": -113.3023910522461, "logps_train/ref_1_2": -87.0, "logps_train/ref_1_l": -58.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -74.5, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 0.6351606845855713, "rewards_train/1-l": -0.37245845794677734, "rewards_train/1-w": 0.9338021278381348, "rewards_train/2-2": 0.9835386276245117, "rewards_train/2-w": 0.06663544476032257, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.306260585784912, "rewards_train/margins_1": 0.2986414432525635, "rewards_train/margins_2": 0.9169031828641891, "step": 268 }, { "epoch": 0.8, "logps_train/policy_1_2": -87.06289672851562, "logps_train/policy_1_l": -280.16162109375, "logps_train/policy_1_w": -80.83699035644531, "logps_train/policy_2_2": -72.45023345947266, "logps_train/policy_2_w": -102.56583404541016, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -245.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -89.0, "logps_train/ref_2_w": -119.5, "rewards_train/1-2": 1.2968356609344482, "rewards_train/1-l": -3.5552234649658203, "rewards_train/1-w": 2.2288012504577637, "rewards_train/2-2": 1.6276332139968872, "rewards_train/2-w": 1.680916666984558, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.784024715423584, "rewards_train/margins_1": 0.9319655895233154, "rewards_train/margins_2": -0.0532834529876709, "step": 268 }, { "epoch": 0.8, "logps_train/policy_1_2": -252.51905822753906, "logps_train/policy_1_l": -198.09231567382812, "logps_train/policy_1_w": -216.37486267089844, "logps_train/policy_2_2": -223.72750854492188, "logps_train/policy_2_w": -252.8382110595703, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -252.0, "logps_train/ref_2_2": -250.0, "logps_train/ref_2_w": -276.0, "rewards_train/1-2": 1.8305150270462036, "rewards_train/1-l": -1.7074729204177856, "rewards_train/1-w": 3.6024556159973145, "rewards_train/2-2": 2.7053756713867188, "rewards_train/2-w": 2.2249679565429688, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.3099285364151, "rewards_train/margins_1": 1.7719405889511108, "rewards_train/margins_2": 0.48040771484375, "step": 268 }, { "epoch": 0.8, "logps_train/policy_1_2": -249.19967651367188, "logps_train/policy_1_l": -194.85975646972656, "logps_train/policy_1_w": -120.66292572021484, "logps_train/policy_2_2": -209.41397094726562, "logps_train/policy_2_w": -153.98968505859375, "logps_train/ref_1_2": -256.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": 0.7769085168838501, "rewards_train/1-l": -2.660975456237793, "rewards_train/1-w": 1.7555824518203735, "rewards_train/2-2": 2.247664451599121, "rewards_train/2-w": 1.2760310173034668, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.4165579080581665, "rewards_train/margins_1": 0.9786739349365234, "rewards_train/margins_2": 0.9716334342956543, "step": 268 }, { "epoch": 0.8, "logps_train/policy_1_2": -186.12869262695312, "logps_train/policy_1_l": -169.49087524414062, "logps_train/policy_1_w": -142.35565185546875, "logps_train/policy_2_2": -154.80758666992188, "logps_train/policy_2_w": -170.70388793945312, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.769943118095398, "rewards_train/1-l": -1.6674474477767944, "rewards_train/1-w": 2.7097463607788086, "rewards_train/2-2": 2.861428737640381, "rewards_train/2-w": 1.810861349105835, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.377193808555603, "rewards_train/margins_1": 0.9398032426834106, "rewards_train/margins_2": 1.050567388534546, "step": 268 }, { "epoch": 0.8, "logps_train/policy_1_2": -172.33470153808594, "logps_train/policy_1_l": -161.96417236328125, "logps_train/policy_1_w": -121.7230453491211, "logps_train/policy_2_2": -141.81964111328125, "logps_train/policy_2_w": -149.4145965576172, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.6243430376052856, "rewards_train/1-l": -1.16829252243042, "rewards_train/1-w": 2.9073824882507324, "rewards_train/2-2": 2.254657506942749, "rewards_train/2-w": 2.1139121055603027, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.075675010681152, "rewards_train/margins_1": 1.2830394506454468, "rewards_train/margins_2": 0.1407454013824463, "step": 268 }, { "epoch": 0.81, "logps_train/policy_1_2": -127.9491195678711, "logps_train/policy_1_l": -182.02337646484375, "logps_train/policy_1_w": -161.80812072753906, "logps_train/policy_2_2": -92.12765502929688, "logps_train/policy_2_w": -220.7486572265625, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 1.585557222366333, "rewards_train/1-l": -1.2460873126983643, "rewards_train/1-w": 2.6082510948181152, "rewards_train/2-2": 2.3684840202331543, "rewards_train/2-w": 0.5188837051391602, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8543384075164795, "rewards_train/margins_1": 1.0226938724517822, "rewards_train/margins_2": 1.8496003150939941, "step": 269 }, { "epoch": 0.81, "logps_train/policy_1_2": -145.739990234375, "logps_train/policy_1_l": -84.98013305664062, "logps_train/policy_1_w": -47.26974868774414, "logps_train/policy_2_2": -107.57747650146484, "logps_train/policy_2_w": -77.51685333251953, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -76.0, "logps_train/ref_1_w": -66.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -89.0, "rewards_train/1-2": 0.8158442974090576, "rewards_train/1-l": -0.9398098587989807, "rewards_train/1-w": 1.8589625358581543, "rewards_train/2-2": 1.9719395637512207, "rewards_train/2-w": 1.1483147144317627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.798772394657135, "rewards_train/margins_1": 1.0431182384490967, "rewards_train/margins_2": 0.823624849319458, "step": 269 }, { "epoch": 0.81, "logps_train/policy_1_2": -243.54217529296875, "logps_train/policy_1_l": -271.0133056640625, "logps_train/policy_1_w": -151.75674438476562, "logps_train/policy_2_2": -190.21092224121094, "logps_train/policy_2_w": -187.21856689453125, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -236.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 0.6535965204238892, "rewards_train/1-l": -3.523205041885376, "rewards_train/1-w": 2.8829185962677, "rewards_train/2-2": 2.4070324897766113, "rewards_train/2-w": 1.9843932390213013, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.406123638153076, "rewards_train/margins_1": 2.229322075843811, "rewards_train/margins_2": 0.42263925075531006, "step": 269 }, { "epoch": 0.81, "logps_train/policy_1_2": -176.71168518066406, "logps_train/policy_1_l": -150.38270568847656, "logps_train/policy_1_w": -135.6920623779297, "logps_train/policy_2_2": -148.91673278808594, "logps_train/policy_2_w": -173.34494018554688, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.9444570541381836, "rewards_train/1-l": -1.197645902633667, "rewards_train/1-w": 2.4854822158813477, "rewards_train/2-2": 2.8864517211914062, "rewards_train/2-w": 1.2405067682266235, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6831281185150146, "rewards_train/margins_1": 0.5410251617431641, "rewards_train/margins_2": 1.6459449529647827, "step": 269 }, { "epoch": 0.81, "logps_train/policy_1_2": -236.07342529296875, "logps_train/policy_1_l": -112.06914520263672, "logps_train/policy_1_w": -150.2099609375, "logps_train/policy_2_2": -184.07899475097656, "logps_train/policy_2_w": -179.8671875, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 0.8207820057868958, "rewards_train/1-l": -0.6893360614776611, "rewards_train/1-w": 2.187598466873169, "rewards_train/2-2": 3.296788215637207, "rewards_train/2-w": 1.3898439407348633, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.87693452835083, "rewards_train/margins_1": 1.3668164610862732, "rewards_train/margins_2": 1.9069442749023438, "step": 269 }, { "epoch": 0.81, "logps_train/policy_1_2": -179.07015991210938, "logps_train/policy_1_l": -254.31126403808594, "logps_train/policy_1_w": -182.27294921875, "logps_train/policy_2_2": -150.76150512695312, "logps_train/policy_2_w": -217.99195861816406, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -232.0, "logps_train/ref_1_w": -221.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -242.0, "rewards_train/1-2": 2.239858627319336, "rewards_train/1-l": -2.2170653343200684, "rewards_train/1-w": 3.8539562225341797, "rewards_train/2-2": 2.9207239151000977, "rewards_train/2-w": 2.419555187225342, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.071021556854248, "rewards_train/margins_1": 1.6140975952148438, "rewards_train/margins_2": 0.5011687278747559, "step": 269 }, { "epoch": 0.81, "logps_train/policy_1_2": -163.11334228515625, "logps_train/policy_1_l": -113.59893798828125, "logps_train/policy_1_w": -165.03427124023438, "logps_train/policy_2_2": -123.36495971679688, "logps_train/policy_2_w": -210.4778289794922, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 1.3699169158935547, "rewards_train/1-l": -0.9856746196746826, "rewards_train/1-w": 2.313760757446289, "rewards_train/2-2": 2.3228793144226074, "rewards_train/2-w": 0.5928425788879395, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2994353771209717, "rewards_train/margins_1": 0.9438438415527344, "rewards_train/margins_2": 1.730036735534668, "step": 269 }, { "epoch": 0.81, "logps_train/policy_1_2": -169.91278076171875, "logps_train/policy_1_l": -197.31735229492188, "logps_train/policy_1_w": -165.93905639648438, "logps_train/policy_2_2": -144.333740234375, "logps_train/policy_2_w": -200.7423553466797, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -197.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.752862811088562, "rewards_train/1-l": -1.6030240058898926, "rewards_train/1-w": 3.0732810497283936, "rewards_train/2-2": 2.447875499725342, "rewards_train/2-w": 1.9820148944854736, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.676305055618286, "rewards_train/margins_1": 1.3204182386398315, "rewards_train/margins_2": 0.46586060523986816, "step": 269 }, { "epoch": 0.81, "learning_rate": 3.4851574435067925e-06, "loss": 0.618, "step": 270 }, { "epoch": 0.81, "logps_train/policy_1_2": -224.93902587890625, "logps_train/policy_1_l": -190.04896545410156, "logps_train/policy_1_w": -102.04864501953125, "logps_train/policy_2_2": -183.52951049804688, "logps_train/policy_2_w": -124.56383514404297, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.8996524810791016, "rewards_train/1-l": -2.4020650386810303, "rewards_train/1-w": 2.27872896194458, "rewards_train/2-2": 2.8986105918884277, "rewards_train/2-w": 1.9975229501724243, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.68079400062561, "rewards_train/margins_1": 0.3790764808654785, "rewards_train/margins_2": 0.9010876417160034, "step": 270 }, { "epoch": 0.81, "logps_train/policy_1_2": -116.67802429199219, "logps_train/policy_1_l": -124.9281997680664, "logps_train/policy_1_w": -70.38333892822266, "logps_train/policy_2_2": -95.75106811523438, "logps_train/policy_2_w": -91.33888244628906, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -84.5, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 1.124384880065918, "rewards_train/1-l": -1.356442928314209, "rewards_train/1-w": 1.4253379106521606, "rewards_train/2-2": 1.4483306407928467, "rewards_train/2-w": 0.6629862785339355, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.7817808389663696, "rewards_train/margins_1": 0.3009530305862427, "rewards_train/margins_2": 0.7853443622589111, "step": 270 }, { "epoch": 0.81, "logps_train/policy_1_2": -176.15757751464844, "logps_train/policy_1_l": -220.27264404296875, "logps_train/policy_1_w": -196.52549743652344, "logps_train/policy_2_2": -149.44268798828125, "logps_train/policy_2_w": -232.23788452148438, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -245.0, "rewards_train/1-2": 1.912367582321167, "rewards_train/1-l": -2.2839059829711914, "rewards_train/1-w": 2.3497931957244873, "rewards_train/2-2": 2.700263023376465, "rewards_train/2-w": 1.2574604749679565, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.633699178695679, "rewards_train/margins_1": 0.4374256134033203, "rewards_train/margins_2": 1.4428025484085083, "step": 270 }, { "epoch": 0.81, "logps_train/policy_1_2": -206.02243041992188, "logps_train/policy_1_l": -135.13665771484375, "logps_train/policy_1_w": -81.74951934814453, "logps_train/policy_2_2": -169.658447265625, "logps_train/policy_2_w": -113.75408935546875, "logps_train/ref_1_2": -213.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 0.6633831262588501, "rewards_train/1-l": -1.2644455432891846, "rewards_train/1-w": 1.8734855651855469, "rewards_train/2-2": 1.6271235942840576, "rewards_train/2-w": 1.040998101234436, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1379311084747314, "rewards_train/margins_1": 1.2101024389266968, "rewards_train/margins_2": 0.5861254930496216, "step": 270 }, { "epoch": 0.81, "logps_train/policy_1_2": -203.96456909179688, "logps_train/policy_1_l": -191.0165557861328, "logps_train/policy_1_w": -134.73519897460938, "logps_train/policy_2_2": -153.43324279785156, "logps_train/policy_2_w": -182.15786743164062, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.4769818782806396, "rewards_train/1-l": -1.1071244478225708, "rewards_train/1-w": 3.0171055793762207, "rewards_train/2-2": 2.938706398010254, "rewards_train/2-w": 1.5935888290405273, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.1242300271987915, "rewards_train/margins_1": 1.540123701095581, "rewards_train/margins_2": 1.3451175689697266, "step": 270 }, { "epoch": 0.81, "logps_train/policy_1_2": -168.69781494140625, "logps_train/policy_1_l": -200.59619140625, "logps_train/policy_1_w": -146.619140625, "logps_train/policy_2_2": -123.92276000976562, "logps_train/policy_2_w": -192.4540557861328, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 1.7552188634872437, "rewards_train/1-l": -2.126805305480957, "rewards_train/1-w": 2.453319549560547, "rewards_train/2-2": 2.738974094390869, "rewards_train/2-w": 0.8600625991821289, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.580124855041504, "rewards_train/margins_1": 0.6981006860733032, "rewards_train/margins_2": 1.8789114952087402, "step": 270 }, { "epoch": 0.81, "logps_train/policy_1_2": -173.16812133789062, "logps_train/policy_1_l": -153.69683837890625, "logps_train/policy_1_w": -135.88507080078125, "logps_train/policy_2_2": -138.3662109375, "logps_train/policy_2_w": -174.1510772705078, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.9144375324249268, "rewards_train/1-l": -1.1310110092163086, "rewards_train/1-w": 2.8696963787078857, "rewards_train/2-2": 3.0071287155151367, "rewards_train/2-w": 1.6802045106887817, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.000707387924194, "rewards_train/margins_1": 0.955258846282959, "rewards_train/margins_2": 1.326924204826355, "step": 270 }, { "epoch": 0.81, "logps_train/policy_1_2": -93.59931182861328, "logps_train/policy_1_l": -90.49893951416016, "logps_train/policy_1_w": -64.86505126953125, "logps_train/policy_2_2": -77.23064422607422, "logps_train/policy_2_w": -92.30589294433594, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -82.5, "logps_train/ref_1_w": -76.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -96.5, "rewards_train/1-2": 1.0416314601898193, "rewards_train/1-l": -0.8038004636764526, "rewards_train/1-w": 1.0980656147003174, "rewards_train/2-2": 1.4034974575042725, "rewards_train/2-w": 0.41628581285476685, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.90186607837677, "rewards_train/margins_1": 0.05643415451049805, "rewards_train/margins_2": 0.9872116446495056, "step": 270 }, { "epoch": 0.81, "logps_train/policy_1_2": -214.42630004882812, "logps_train/policy_1_l": -156.12071228027344, "logps_train/policy_1_w": -115.44329833984375, "logps_train/policy_2_2": -171.31832885742188, "logps_train/policy_2_w": -164.63699340820312, "logps_train/ref_1_2": -237.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 2.25736927986145, "rewards_train/1-l": -1.381016492843628, "rewards_train/1-w": 2.2119204998016357, "rewards_train/2-2": 3.6775429248809814, "rewards_train/2-w": 1.0112996101379395, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5929369926452637, "rewards_train/margins_1": -0.04544878005981445, "rewards_train/margins_2": 2.666243314743042, "step": 271 }, { "epoch": 0.81, "logps_train/policy_1_2": -205.8086700439453, "logps_train/policy_1_l": -183.52662658691406, "logps_train/policy_1_w": -186.40696716308594, "logps_train/policy_2_2": -167.4439697265625, "logps_train/policy_2_w": -219.67013549804688, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.3441327810287476, "rewards_train/1-l": -1.6089118719100952, "rewards_train/1-w": 3.1218032836914062, "rewards_train/2-2": 2.661853313446045, "rewards_train/2-w": 1.9267356395721436, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.7307151556015015, "rewards_train/margins_1": 1.7776705026626587, "rewards_train/margins_2": 0.7351176738739014, "step": 271 }, { "epoch": 0.81, "logps_train/policy_1_2": -49.34527587890625, "logps_train/policy_1_l": -39.18580627441406, "logps_train/policy_1_w": -41.924072265625, "logps_train/policy_2_2": -42.779422760009766, "logps_train/policy_2_w": -53.31395721435547, "logps_train/ref_1_2": -53.0, "logps_train/ref_1_l": -34.25, "logps_train/ref_1_w": -54.75, "logps_train/ref_2_2": -48.5, "logps_train/ref_2_w": -62.0, "rewards_train/1-2": 0.3584415316581726, "rewards_train/1-l": -0.4916275441646576, "rewards_train/1-w": 1.286596655845642, "rewards_train/2-2": 0.5765500068664551, "rewards_train/2-w": 0.8857917785644531, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.125, "rewards_train/margins": 1.7782242000102997, "rewards_train/margins_1": 0.9281551241874695, "rewards_train/margins_2": -0.30924177169799805, "step": 271 }, { "epoch": 0.81, "logps_train/policy_1_2": -115.35455322265625, "logps_train/policy_1_l": -56.684627532958984, "logps_train/policy_1_w": -43.131954193115234, "logps_train/policy_2_2": -91.40031433105469, "logps_train/policy_2_w": -59.59031677246094, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -46.0, "logps_train/ref_1_w": -57.5, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -70.0, "rewards_train/1-2": 0.5754831433296204, "rewards_train/1-l": -1.0739314556121826, "rewards_train/1-w": 1.4305546283721924, "rewards_train/2-2": 1.742781400680542, "rewards_train/2-w": 1.0651874542236328, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.504486083984375, "rewards_train/margins_1": 0.855071485042572, "rewards_train/margins_2": 0.6775939464569092, "step": 271 }, { "epoch": 0.81, "logps_train/policy_1_2": -163.32769775390625, "logps_train/policy_1_l": -204.0165557861328, "logps_train/policy_1_w": -184.15780639648438, "logps_train/policy_2_2": -128.18597412109375, "logps_train/policy_2_w": -236.73098754882812, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -222.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -262.0, "rewards_train/1-2": 1.9359791278839111, "rewards_train/1-l": -1.5273213386535645, "rewards_train/1-w": 3.845155954360962, "rewards_train/2-2": 2.8149964809417725, "rewards_train/2-w": 2.5081520080566406, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.372477293014526, "rewards_train/margins_1": 1.9091768264770508, "rewards_train/margins_2": 0.30684447288513184, "step": 271 }, { "epoch": 0.81, "logps_train/policy_1_2": -202.21554565429688, "logps_train/policy_1_l": -168.94110107421875, "logps_train/policy_1_w": -141.08450317382812, "logps_train/policy_2_2": -160.2187042236328, "logps_train/policy_2_w": -184.56216430664062, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.1516883373260498, "rewards_train/1-l": -1.1034858226776123, "rewards_train/1-w": 2.683346748352051, "rewards_train/2-2": 2.620903968811035, "rewards_train/2-w": 1.3566746711730957, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.786832571029663, "rewards_train/margins_1": 1.531658411026001, "rewards_train/margins_2": 1.2642292976379395, "step": 271 }, { "epoch": 0.81, "logps_train/policy_1_2": -233.52499389648438, "logps_train/policy_1_l": -181.05938720703125, "logps_train/policy_1_w": -152.97439575195312, "logps_train/policy_2_2": -175.83047485351562, "logps_train/policy_2_w": -196.99566650390625, "logps_train/ref_1_2": -243.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 0.986564576625824, "rewards_train/1-l": -1.637188196182251, "rewards_train/1-w": 3.190061330795288, "rewards_train/2-2": 3.3466405868530273, "rewards_train/2-w": 2.025432586669922, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.827249526977539, "rewards_train/margins_1": 2.203496754169464, "rewards_train/margins_2": 1.3212080001831055, "step": 271 }, { "epoch": 0.81, "logps_train/policy_1_2": -100.66094207763672, "logps_train/policy_1_l": -69.88218688964844, "logps_train/policy_1_w": -107.78731536865234, "logps_train/policy_2_2": -72.52320861816406, "logps_train/policy_2_w": -132.6521453857422, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -63.5, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -94.5, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.4378118515014648, "rewards_train/1-l": -0.6257182955741882, "rewards_train/1-w": 1.6079872846603394, "rewards_train/2-2": 2.1703360080718994, "rewards_train/2-w": 1.0918173789978027, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.2337055802345276, "rewards_train/margins_1": 0.1701754331588745, "rewards_train/margins_2": 1.0785186290740967, "step": 271 }, { "epoch": 0.81, "learning_rate": 3.462410164163893e-06, "loss": 0.7124, "step": 272 }, { "epoch": 0.81, "logps_train/policy_1_2": -166.7053680419922, "logps_train/policy_1_l": -167.9475555419922, "logps_train/policy_1_w": -118.24888610839844, "logps_train/policy_2_2": -134.57858276367188, "logps_train/policy_2_w": -155.52064514160156, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.176337718963623, "rewards_train/1-l": -1.3381140232086182, "rewards_train/1-w": 1.9403457641601562, "rewards_train/2-2": 2.1905789375305176, "rewards_train/2-w": 0.7877793312072754, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2784597873687744, "rewards_train/margins_1": 0.7640080451965332, "rewards_train/margins_2": 1.4027996063232422, "step": 272 }, { "epoch": 0.81, "logps_train/policy_1_2": -160.32171630859375, "logps_train/policy_1_l": -162.03564453125, "logps_train/policy_1_w": -122.15396118164062, "logps_train/policy_2_2": -121.68097686767578, "logps_train/policy_2_w": -154.1876220703125, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.627204179763794, "rewards_train/1-l": -1.2191896438598633, "rewards_train/1-w": 2.7267918586730957, "rewards_train/2-2": 2.956902503967285, "rewards_train/2-w": 1.5968635082244873, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.945981502532959, "rewards_train/margins_1": 1.0995876789093018, "rewards_train/margins_2": 1.3600389957427979, "step": 272 }, { "epoch": 0.81, "logps_train/policy_1_2": -175.95513916015625, "logps_train/policy_1_l": -143.09661865234375, "logps_train/policy_1_w": -114.94325256347656, "logps_train/policy_2_2": -139.59332275390625, "logps_train/policy_2_w": -147.69656372070312, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.1234309673309326, "rewards_train/1-l": -1.5776314735412598, "rewards_train/1-w": 2.717393636703491, "rewards_train/2-2": 2.480120897293091, "rewards_train/2-w": 1.5322966575622559, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.295025110244751, "rewards_train/margins_1": 1.5939626693725586, "rewards_train/margins_2": 0.947824239730835, "step": 272 }, { "epoch": 0.81, "logps_train/policy_1_2": -140.59173583984375, "logps_train/policy_1_l": -135.7632293701172, "logps_train/policy_1_w": -79.88385009765625, "logps_train/policy_2_2": -100.81228637695312, "logps_train/policy_2_w": -102.868896484375, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 0.9455143809318542, "rewards_train/1-l": -1.4173388481140137, "rewards_train/1-w": 1.80302095413208, "rewards_train/2-2": 1.8472870588302612, "rewards_train/2-w": 1.6474854946136475, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.2203598022460938, "rewards_train/margins_1": 0.8575065732002258, "rewards_train/margins_2": 0.19980156421661377, "step": 272 }, { "epoch": 0.81, "logps_train/policy_1_2": -199.45263671875, "logps_train/policy_1_l": -193.82135009765625, "logps_train/policy_1_w": -128.5242919921875, "logps_train/policy_2_2": -167.06866455078125, "logps_train/policy_2_w": -160.64707946777344, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 2.5797359943389893, "rewards_train/1-l": -1.7293524742126465, "rewards_train/1-w": 2.0913195610046387, "rewards_train/2-2": 3.619696855545044, "rewards_train/2-w": 1.161854863166809, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.820672035217285, "rewards_train/margins_1": -0.4884164333343506, "rewards_train/margins_2": 2.457841992378235, "step": 272 }, { "epoch": 0.81, "logps_train/policy_1_2": -112.47602844238281, "logps_train/policy_1_l": -99.21480560302734, "logps_train/policy_1_w": -55.38972091674805, "logps_train/policy_2_2": -79.84922790527344, "logps_train/policy_2_w": -76.06463623046875, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -76.5, "logps_train/ref_1_w": -67.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -83.0, "rewards_train/1-2": 1.568021535873413, "rewards_train/1-l": -2.2523398399353027, "rewards_train/1-w": 1.1483325958251953, "rewards_train/2-2": 2.318202018737793, "rewards_train/2-w": 0.6708796620368958, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.400672435760498, "rewards_train/margins_1": -0.4196889400482178, "rewards_train/margins_2": 1.6473223567008972, "step": 272 }, { "epoch": 0.81, "logps_train/policy_1_2": -106.47422790527344, "logps_train/policy_1_l": -119.69427490234375, "logps_train/policy_1_w": -103.05268859863281, "logps_train/policy_2_2": -83.37355041503906, "logps_train/policy_2_w": -130.29296875, "logps_train/ref_1_2": -119.5, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -100.5, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.2947649955749512, "rewards_train/1-l": -1.138176679611206, "rewards_train/1-w": 1.915825366973877, "rewards_train/2-2": 1.7181135416030884, "rewards_train/2-w": 0.980079174041748, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.054002046585083, "rewards_train/margins_1": 0.6210603713989258, "rewards_train/margins_2": 0.7380343675613403, "step": 272 }, { "epoch": 0.81, "logps_train/policy_1_2": -173.54786682128906, "logps_train/policy_1_l": -162.0414581298828, "logps_train/policy_1_w": -97.88026428222656, "logps_train/policy_2_2": -137.42327880859375, "logps_train/policy_2_w": -132.93679809570312, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 0.7858386039733887, "rewards_train/1-l": -2.3408641815185547, "rewards_train/1-w": 2.154161214828491, "rewards_train/2-2": 2.0850160121917725, "rewards_train/2-w": 1.2188199758529663, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.495025396347046, "rewards_train/margins_1": 1.3683226108551025, "rewards_train/margins_2": 0.8661960363388062, "step": 272 }, { "epoch": 0.82, "logps_train/policy_1_2": -76.73651123046875, "logps_train/policy_1_l": -71.69326782226562, "logps_train/policy_1_w": -70.08585357666016, "logps_train/policy_2_2": -66.71961975097656, "logps_train/policy_2_w": -93.35286712646484, "logps_train/ref_1_2": -82.0, "logps_train/ref_1_l": -65.5, "logps_train/ref_1_w": -92.5, "logps_train/ref_2_2": -77.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 0.5408022403717041, "rewards_train/1-l": -0.6097568869590759, "rewards_train/1-w": 2.2285242080688477, "rewards_train/2-2": 1.020420789718628, "rewards_train/2-w": 1.5572912693023682, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 2.8382810950279236, "rewards_train/margins_1": 1.6877219676971436, "rewards_train/margins_2": -0.5368704795837402, "step": 273 }, { "epoch": 0.82, "logps_train/policy_1_2": -54.17620086669922, "logps_train/policy_1_l": -148.41714477539062, "logps_train/policy_1_w": -101.138427734375, "logps_train/policy_2_2": -46.46784973144531, "logps_train/policy_2_w": -128.3458251953125, "logps_train/ref_1_2": -67.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -59.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.253424882888794, "rewards_train/1-l": -2.1680212020874023, "rewards_train/1-w": 2.509826183319092, "rewards_train/2-2": 1.2753829956054688, "rewards_train/2-w": 1.2302606105804443, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.677847385406494, "rewards_train/margins_1": 1.2564013004302979, "rewards_train/margins_2": 0.045122385025024414, "step": 273 }, { "epoch": 0.82, "logps_train/policy_1_2": -115.75796508789062, "logps_train/policy_1_l": -52.118919372558594, "logps_train/policy_1_w": -53.29641342163086, "logps_train/policy_2_2": -78.8911361694336, "logps_train/policy_2_w": -85.06288146972656, "logps_train/ref_1_2": -122.5, "logps_train/ref_1_l": -45.0, "logps_train/ref_1_w": -65.5, "logps_train/ref_2_2": -94.5, "logps_train/ref_2_w": -88.5, "rewards_train/1-2": 0.6570154428482056, "rewards_train/1-l": -0.7118920087814331, "rewards_train/1-w": 1.2152807712554932, "rewards_train/2-2": 1.5561988353729248, "rewards_train/2-w": 0.3507440388202667, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.9271727800369263, "rewards_train/margins_1": 0.5582653284072876, "rewards_train/margins_2": 1.205454796552658, "step": 273 }, { "epoch": 0.82, "logps_train/policy_1_2": -155.17251586914062, "logps_train/policy_1_l": -215.8433837890625, "logps_train/policy_1_w": -144.84530639648438, "logps_train/policy_2_2": -118.68901824951172, "logps_train/policy_2_w": -188.94989013671875, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.2671241760253906, "rewards_train/1-l": -2.4218382835388184, "rewards_train/1-w": 3.643594741821289, "rewards_train/2-2": 2.249457359313965, "rewards_train/2-w": 2.2862606048583984, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.065433025360107, "rewards_train/margins_1": 2.3764705657958984, "rewards_train/margins_2": -0.036803245544433594, "step": 273 }, { "epoch": 0.82, "logps_train/policy_1_2": -121.12400817871094, "logps_train/policy_1_l": -104.94799041748047, "logps_train/policy_1_w": -81.77094268798828, "logps_train/policy_2_2": -106.44660186767578, "logps_train/policy_2_w": -96.41346740722656, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -95.5, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 1.3282240629196167, "rewards_train/1-l": -0.9186270236968994, "rewards_train/1-w": 2.1080617904663086, "rewards_train/2-2": 1.753777027130127, "rewards_train/2-w": 1.7320908308029175, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.026688814163208, "rewards_train/margins_1": 0.7798377275466919, "rewards_train/margins_2": 0.021686196327209473, "step": 273 }, { "epoch": 0.82, "logps_train/policy_1_2": -191.054443359375, "logps_train/policy_1_l": -142.1103973388672, "logps_train/policy_1_w": -121.99012756347656, "logps_train/policy_2_2": -167.36251831054688, "logps_train/policy_2_w": -154.5915069580078, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.8742423057556152, "rewards_train/1-l": -1.6098674535751343, "rewards_train/1-w": 2.373642683029175, "rewards_train/2-2": 2.6543729305267334, "rewards_train/2-w": 1.2924118041992188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.983510136604309, "rewards_train/margins_1": 0.49940037727355957, "rewards_train/margins_2": 1.3619611263275146, "step": 273 }, { "epoch": 0.82, "logps_train/policy_1_2": -215.37686157226562, "logps_train/policy_1_l": -179.7245635986328, "logps_train/policy_1_w": -152.34893798828125, "logps_train/policy_2_2": -186.6990966796875, "logps_train/policy_2_w": -200.91079711914062, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 2.039853096008301, "rewards_train/1-l": -2.1234323978424072, "rewards_train/1-w": 3.08073091506958, "rewards_train/2-2": 3.108215808868408, "rewards_train/2-w": 1.630794882774353, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.204163312911987, "rewards_train/margins_1": 1.0408778190612793, "rewards_train/margins_2": 1.4774209260940552, "step": 273 }, { "epoch": 0.82, "logps_train/policy_1_2": -150.373291015625, "logps_train/policy_1_l": -182.7509307861328, "logps_train/policy_1_w": -117.62069702148438, "logps_train/policy_2_2": -128.75270080566406, "logps_train/policy_2_w": -139.35096740722656, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.7329833507537842, "rewards_train/1-l": -0.9813436269760132, "rewards_train/1-w": 1.905899167060852, "rewards_train/2-2": 2.1731674671173096, "rewards_train/2-w": 1.4383409023284912, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8872427940368652, "rewards_train/margins_1": 0.17291581630706787, "rewards_train/margins_2": 0.7348265647888184, "step": 273 }, { "epoch": 0.82, "learning_rate": 3.4395689552855956e-06, "loss": 0.7188, "step": 274 }, { "epoch": 0.82, "logps_train/policy_1_2": -167.99673461914062, "logps_train/policy_1_l": -134.9370880126953, "logps_train/policy_1_w": -148.45433044433594, "logps_train/policy_2_2": -132.78614807128906, "logps_train/policy_2_w": -172.92172241210938, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.4440761804580688, "rewards_train/1-l": -1.269686222076416, "rewards_train/1-w": 2.156519889831543, "rewards_train/2-2": 2.6151347160339355, "rewards_train/2-w": 1.549429178237915, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.426206111907959, "rewards_train/margins_1": 0.7124437093734741, "rewards_train/margins_2": 1.0657055377960205, "step": 274 }, { "epoch": 0.82, "logps_train/policy_1_2": -167.07965087890625, "logps_train/policy_1_l": -215.70831298828125, "logps_train/policy_1_w": -113.03304290771484, "logps_train/policy_2_2": -120.13997650146484, "logps_train/policy_2_w": -157.12069702148438, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.70766019821167, "rewards_train/1-l": -2.3743209838867188, "rewards_train/1-w": 2.7980871200561523, "rewards_train/2-2": 3.104752540588379, "rewards_train/2-w": 1.8656171560287476, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.172408103942871, "rewards_train/margins_1": 1.0904269218444824, "rewards_train/margins_2": 1.2391353845596313, "step": 274 }, { "epoch": 0.82, "logps_train/policy_1_2": -46.634002685546875, "logps_train/policy_1_l": -11.577455520629883, "logps_train/policy_1_w": -48.46581268310547, "logps_train/policy_2_2": -34.640350341796875, "logps_train/policy_2_w": -65.47895050048828, "logps_train/ref_1_2": -50.0, "logps_train/ref_1_l": -9.25, "logps_train/ref_1_w": -59.0, "logps_train/ref_2_2": -41.5, "logps_train/ref_2_w": -71.0, "rewards_train/1-2": 0.31824055314064026, "rewards_train/1-l": -0.23445451259613037, "rewards_train/1-w": 1.0405282974243164, "rewards_train/2-2": 0.7070586681365967, "rewards_train/2-w": 0.5349175333976746, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.2749828100204468, "rewards_train/margins_1": 0.7222877442836761, "rewards_train/margins_2": 0.17214113473892212, "step": 274 }, { "epoch": 0.82, "logps_train/policy_1_2": -149.74676513671875, "logps_train/policy_1_l": -127.74860382080078, "logps_train/policy_1_w": -160.796142578125, "logps_train/policy_2_2": -119.51314544677734, "logps_train/policy_2_w": -201.57244873046875, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.3161444664001465, "rewards_train/1-l": -1.0859928131103516, "rewards_train/1-w": 1.8410882949829102, "rewards_train/2-2": 2.6180217266082764, "rewards_train/2-w": 0.4630679786205292, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.9270811080932617, "rewards_train/margins_1": 0.5249438285827637, "rewards_train/margins_2": 2.154953747987747, "step": 274 }, { "epoch": 0.82, "logps_train/policy_1_2": -162.49160766601562, "logps_train/policy_1_l": -139.0793914794922, "logps_train/policy_1_w": -128.22967529296875, "logps_train/policy_2_2": -126.44822692871094, "logps_train/policy_2_w": -170.343994140625, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.5877537727355957, "rewards_train/1-l": -1.0622365474700928, "rewards_train/1-w": 2.792658567428589, "rewards_train/2-2": 2.228224277496338, "rewards_train/2-w": 1.648803472518921, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8548951148986816, "rewards_train/margins_1": 1.2049047946929932, "rewards_train/margins_2": 0.579420804977417, "step": 274 }, { "epoch": 0.82, "logps_train/policy_1_2": -182.42800903320312, "logps_train/policy_1_l": -160.69369506835938, "logps_train/policy_1_w": -124.26176452636719, "logps_train/policy_2_2": -144.45127868652344, "logps_train/policy_2_w": -163.07040405273438, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.8207740783691406, "rewards_train/1-l": -1.687729001045227, "rewards_train/1-w": 3.0544872283935547, "rewards_train/2-2": 2.8575081825256348, "rewards_train/2-w": 1.8195217847824097, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.742216229438782, "rewards_train/margins_1": 1.233713150024414, "rewards_train/margins_2": 1.037986397743225, "step": 274 }, { "epoch": 0.82, "logps_train/policy_1_2": -134.08453369140625, "logps_train/policy_1_l": -109.46900939941406, "logps_train/policy_1_w": -117.51994323730469, "logps_train/policy_2_2": -112.9350357055664, "logps_train/policy_2_w": -147.35647583007812, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.0735783576965332, "rewards_train/1-l": -0.5258073806762695, "rewards_train/1-w": 2.003474712371826, "rewards_train/2-2": 1.7564966678619385, "rewards_train/2-w": 1.2010712623596191, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.5292820930480957, "rewards_train/margins_1": 0.929896354675293, "rewards_train/margins_2": 0.5554254055023193, "step": 274 }, { "epoch": 0.82, "logps_train/policy_1_2": -195.49948120117188, "logps_train/policy_1_l": -200.8821258544922, "logps_train/policy_1_w": -159.59176635742188, "logps_train/policy_2_2": -158.47206115722656, "logps_train/policy_2_w": -195.2990264892578, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.273489236831665, "rewards_train/1-l": -1.4327445030212402, "rewards_train/1-w": 2.715825080871582, "rewards_train/2-2": 2.619199752807617, "rewards_train/2-w": 1.6122851371765137, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.148569583892822, "rewards_train/margins_1": 1.442335844039917, "rewards_train/margins_2": 1.0069146156311035, "step": 274 }, { "epoch": 0.82, "logps_train/policy_1_2": -230.88641357421875, "logps_train/policy_1_l": -163.98170471191406, "logps_train/policy_1_w": -135.4884490966797, "logps_train/policy_2_2": -185.10861206054688, "logps_train/policy_2_w": -176.73118591308594, "logps_train/ref_1_2": -249.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.7738592624664307, "rewards_train/1-l": -1.2651631832122803, "rewards_train/1-w": 3.0714681148529053, "rewards_train/2-2": 3.0555458068847656, "rewards_train/2-w": 2.0839130878448486, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.3366312980651855, "rewards_train/margins_1": 1.2976088523864746, "rewards_train/margins_2": 0.971632719039917, "step": 275 }, { "epoch": 0.82, "logps_train/policy_1_2": -213.6943817138672, "logps_train/policy_1_l": -102.27515411376953, "logps_train/policy_1_w": -72.4949722290039, "logps_train/policy_2_2": -175.55465698242188, "logps_train/policy_2_w": -88.23529052734375, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -95.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -107.0, "rewards_train/1-2": 1.069624662399292, "rewards_train/1-l": -1.1197030544281006, "rewards_train/1-w": 2.298940420150757, "rewards_train/2-2": 2.4937539100646973, "rewards_train/2-w": 1.892095923423767, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4186434745788574, "rewards_train/margins_1": 1.2293157577514648, "rewards_train/margins_2": 0.6016579866409302, "step": 275 }, { "epoch": 0.82, "logps_train/policy_1_2": -147.94570922851562, "logps_train/policy_1_l": -161.52120971679688, "logps_train/policy_1_w": -121.56140899658203, "logps_train/policy_2_2": -121.64402770996094, "logps_train/policy_2_w": -158.264892578125, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.1148042678833008, "rewards_train/1-l": -1.1099330186843872, "rewards_train/1-w": 2.5618276596069336, "rewards_train/2-2": 2.1309094429016113, "rewards_train/2-w": 1.3703863620758057, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.671760678291321, "rewards_train/margins_1": 1.4470233917236328, "rewards_train/margins_2": 0.7605230808258057, "step": 275 }, { "epoch": 0.82, "logps_train/policy_1_2": -132.4281463623047, "logps_train/policy_1_l": -203.84645080566406, "logps_train/policy_1_w": -138.1696014404297, "logps_train/policy_2_2": -105.936279296875, "logps_train/policy_2_w": -172.58853149414062, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.0470290184020996, "rewards_train/1-l": -2.490114212036133, "rewards_train/1-w": 2.151789665222168, "rewards_train/2-2": 2.036060094833374, "rewards_train/2-w": 1.104427456855774, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.641903877258301, "rewards_train/margins_1": 1.1047606468200684, "rewards_train/margins_2": 0.9316326379776001, "step": 275 }, { "epoch": 0.82, "logps_train/policy_1_2": -182.52786254882812, "logps_train/policy_1_l": -90.0792236328125, "logps_train/policy_1_w": -110.32522583007812, "logps_train/policy_2_2": -154.8654022216797, "logps_train/policy_2_w": -133.6249237060547, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.897995114326477, "rewards_train/1-l": -0.9297975301742554, "rewards_train/1-w": 2.5627894401550293, "rewards_train/2-2": 2.7493972778320312, "rewards_train/2-w": 1.7062573432922363, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4925869703292847, "rewards_train/margins_1": 0.6647943258285522, "rewards_train/margins_2": 1.043139934539795, "step": 275 }, { "epoch": 0.82, "logps_train/policy_1_2": -202.71609497070312, "logps_train/policy_1_l": -241.4642333984375, "logps_train/policy_1_w": -94.20985412597656, "logps_train/policy_2_2": -163.13198852539062, "logps_train/policy_2_w": -124.09016418457031, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.3112030029296875, "rewards_train/1-l": -3.1189815998077393, "rewards_train/1-w": 1.875889539718628, "rewards_train/2-2": 2.575862407684326, "rewards_train/2-w": 1.1753580570220947, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.994871139526367, "rewards_train/margins_1": 0.5646865367889404, "rewards_train/margins_2": 1.4005043506622314, "step": 275 }, { "epoch": 0.82, "logps_train/policy_1_2": -83.87452697753906, "logps_train/policy_1_l": -118.878662109375, "logps_train/policy_1_w": -38.0733528137207, "logps_train/policy_2_2": -69.30036926269531, "logps_train/policy_2_w": -45.73043441772461, "logps_train/ref_1_2": -99.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -51.5, "logps_train/ref_2_2": -90.0, "logps_train/ref_2_w": -56.5, "rewards_train/1-2": 1.478172779083252, "rewards_train/1-l": -2.0663809776306152, "rewards_train/1-w": 1.3414928913116455, "rewards_train/2-2": 2.0559005737304688, "rewards_train/2-w": 1.0793001651763916, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4078738689422607, "rewards_train/margins_1": -0.13667988777160645, "rewards_train/margins_2": 0.9766004085540771, "step": 275 }, { "epoch": 0.82, "logps_train/policy_1_2": -158.63665771484375, "logps_train/policy_1_l": -171.48770141601562, "logps_train/policy_1_w": -166.6585235595703, "logps_train/policy_2_2": -131.68203735351562, "logps_train/policy_2_w": -203.36026000976562, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.9566457271575928, "rewards_train/1-l": -1.4378323554992676, "rewards_train/1-w": 2.0864906311035156, "rewards_train/2-2": 2.5528907775878906, "rewards_train/2-w": 0.629989743232727, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.524322986602783, "rewards_train/margins_1": 0.12984490394592285, "rewards_train/margins_2": 1.9229010343551636, "step": 275 }, { "epoch": 0.83, "learning_rate": 3.416636046133621e-06, "loss": 0.7518, "step": 276 }, { "epoch": 0.83, "logps_train/policy_1_2": -206.73623657226562, "logps_train/policy_1_l": -246.51495361328125, "logps_train/policy_1_w": -164.96743774414062, "logps_train/policy_2_2": -163.1494903564453, "logps_train/policy_2_w": -197.72857666015625, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -236.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.6513770818710327, "rewards_train/1-l": -1.0327448844909668, "rewards_train/1-w": 2.595442771911621, "rewards_train/2-2": 2.8975510597229004, "rewards_train/2-w": 1.6865172386169434, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.628187656402588, "rewards_train/margins_1": 0.9440656900405884, "rewards_train/margins_2": 1.211033821105957, "step": 276 }, { "epoch": 0.83, "logps_train/policy_1_2": -116.40480041503906, "logps_train/policy_1_l": -129.13206481933594, "logps_train/policy_1_w": -101.79930877685547, "logps_train/policy_2_2": -81.54965209960938, "logps_train/policy_2_w": -140.19253540039062, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 0.6487780213356018, "rewards_train/1-l": 0.3322760760784149, "rewards_train/1-w": 2.1472175121307373, "rewards_train/2-2": 2.2917139530181885, "rewards_train/2-w": 0.47742703557014465, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.8149414360523224, "rewards_train/margins_1": 1.4984394907951355, "rewards_train/margins_2": 1.8142869174480438, "step": 276 }, { "epoch": 0.83, "logps_train/policy_1_2": -103.71153259277344, "logps_train/policy_1_l": -83.29451751708984, "logps_train/policy_1_w": -85.98368835449219, "logps_train/policy_2_2": -86.13922119140625, "logps_train/policy_2_w": -118.49089050292969, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -74.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -99.5, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 0.9796276092529297, "rewards_train/1-l": -0.886482834815979, "rewards_train/1-w": 1.4633495807647705, "rewards_train/2-2": 1.3071715831756592, "rewards_train/2-w": 0.6839196681976318, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.3498324155807495, "rewards_train/margins_1": 0.4837219715118408, "rewards_train/margins_2": 0.6232519149780273, "step": 276 }, { "epoch": 0.83, "logps_train/policy_1_2": -132.24005126953125, "logps_train/policy_1_l": -159.2518310546875, "logps_train/policy_1_w": -140.14431762695312, "logps_train/policy_2_2": -103.95718383789062, "logps_train/policy_2_w": -165.10592651367188, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 0.9650577902793884, "rewards_train/1-l": -1.0408098697662354, "rewards_train/1-w": 2.6488490104675293, "rewards_train/2-2": 1.943344235420227, "rewards_train/2-w": 1.6870627403259277, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.6896588802337646, "rewards_train/margins_1": 1.6837912201881409, "rewards_train/margins_2": 0.2562814950942993, "step": 276 }, { "epoch": 0.83, "logps_train/policy_1_2": -118.09916687011719, "logps_train/policy_1_l": -120.21797180175781, "logps_train/policy_1_w": -77.62248229980469, "logps_train/policy_2_2": -86.70621490478516, "logps_train/policy_2_w": -107.70256042480469, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -99.5, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -118.5, "rewards_train/1-2": 1.463130235671997, "rewards_train/1-l": -1.0964062213897705, "rewards_train/1-w": 2.1940016746520996, "rewards_train/2-2": 2.4625816345214844, "rewards_train/2-w": 1.1078691482543945, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.29040789604187, "rewards_train/margins_1": 0.7308714389801025, "rewards_train/margins_2": 1.3547124862670898, "step": 276 }, { "epoch": 0.83, "logps_train/policy_1_2": -148.00286865234375, "logps_train/policy_1_l": -226.40396118164062, "logps_train/policy_1_w": -187.4329833984375, "logps_train/policy_2_2": -116.88616180419922, "logps_train/policy_2_w": -232.83843994140625, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 1.687603235244751, "rewards_train/1-l": -2.43316912651062, "rewards_train/1-w": 3.043419599533081, "rewards_train/2-2": 2.6453676223754883, "rewards_train/2-w": 1.5020928382873535, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.476588726043701, "rewards_train/margins_1": 1.35581636428833, "rewards_train/margins_2": 1.1432747840881348, "step": 276 }, { "epoch": 0.83, "logps_train/policy_1_2": -231.8907470703125, "logps_train/policy_1_l": -178.54661560058594, "logps_train/policy_1_w": -113.81817626953125, "logps_train/policy_2_2": -197.3895721435547, "logps_train/policy_2_w": -141.04061889648438, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -226.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.743739128112793, "rewards_train/1-l": -1.46247398853302, "rewards_train/1-w": 2.0490427017211914, "rewards_train/2-2": 2.8376059532165527, "rewards_train/2-w": 1.450626015663147, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5115166902542114, "rewards_train/margins_1": 0.30530357360839844, "rewards_train/margins_2": 1.3869799375534058, "step": 276 }, { "epoch": 0.83, "logps_train/policy_1_2": -62.43409729003906, "logps_train/policy_1_l": -104.47998046875, "logps_train/policy_1_w": -121.35987091064453, "logps_train/policy_2_2": -48.85862731933594, "logps_train/policy_2_w": -154.43218994140625, "logps_train/ref_1_2": -75.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -63.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.2765121459960938, "rewards_train/1-l": -0.7312016487121582, "rewards_train/1-w": 2.13979434967041, "rewards_train/2-2": 1.4391374588012695, "rewards_train/2-w": 1.115373969078064, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.8709959983825684, "rewards_train/margins_1": 0.8632822036743164, "rewards_train/margins_2": 0.32376348972320557, "step": 276 }, { "epoch": 0.83, "logps_train/policy_1_2": -188.30880737304688, "logps_train/policy_1_l": -223.990966796875, "logps_train/policy_1_w": -122.52374267578125, "logps_train/policy_2_2": -154.3504180908203, "logps_train/policy_2_w": -151.60662841796875, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.919119954109192, "rewards_train/1-l": -1.875657558441162, "rewards_train/1-w": 2.593719482421875, "rewards_train/2-2": 2.8837082386016846, "rewards_train/2-w": 1.9705864191055298, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.469377040863037, "rewards_train/margins_1": 0.6745995283126831, "rewards_train/margins_2": 0.9131218194961548, "step": 277 }, { "epoch": 0.83, "logps_train/policy_1_2": -54.74960708618164, "logps_train/policy_1_l": -69.90059661865234, "logps_train/policy_1_w": -86.26019287109375, "logps_train/policy_2_2": -43.1490364074707, "logps_train/policy_2_w": -108.33783721923828, "logps_train/ref_1_2": -65.0, "logps_train/ref_1_l": -63.5, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -56.25, "logps_train/ref_2_w": -119.5, "rewards_train/1-2": 1.005508303642273, "rewards_train/1-l": -0.6549034714698792, "rewards_train/1-w": 2.140777587890625, "rewards_train/2-2": 1.3046274185180664, "rewards_train/2-w": 1.1435596942901611, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.795681059360504, "rewards_train/margins_1": 1.135269284248352, "rewards_train/margins_2": 0.16106772422790527, "step": 277 }, { "epoch": 0.83, "logps_train/policy_1_2": -152.53176879882812, "logps_train/policy_1_l": -159.18692016601562, "logps_train/policy_1_w": -96.92327880859375, "logps_train/policy_2_2": -117.37257385253906, "logps_train/policy_2_w": -122.44096374511719, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.0311996936798096, "rewards_train/1-l": -1.765567660331726, "rewards_train/1-w": 2.0811095237731934, "rewards_train/2-2": 2.415868043899536, "rewards_train/2-w": 1.7137157917022705, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8466771841049194, "rewards_train/margins_1": 1.0499098300933838, "rewards_train/margins_2": 0.7021522521972656, "step": 277 }, { "epoch": 0.83, "logps_train/policy_1_2": -165.45729064941406, "logps_train/policy_1_l": -172.60098266601562, "logps_train/policy_1_w": -89.99622344970703, "logps_train/policy_2_2": -131.67843627929688, "logps_train/policy_2_w": -113.90451049804688, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.5648235082626343, "rewards_train/1-l": -2.7581381797790527, "rewards_train/1-w": 2.0874929428100586, "rewards_train/2-2": 2.848904848098755, "rewards_train/2-w": 1.4981714487075806, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.845631122589111, "rewards_train/margins_1": 0.5226694345474243, "rewards_train/margins_2": 1.3507333993911743, "step": 277 }, { "epoch": 0.83, "logps_train/policy_1_2": -148.12942504882812, "logps_train/policy_1_l": -151.41958618164062, "logps_train/policy_1_w": -131.61912536621094, "logps_train/policy_2_2": -121.36547088623047, "logps_train/policy_2_w": -167.9705047607422, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 2.0401816368103027, "rewards_train/1-l": -1.3083642721176147, "rewards_train/1-w": 2.6279313564300537, "rewards_train/2-2": 2.6743907928466797, "rewards_train/2-w": 1.6740432977676392, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.9362956285476685, "rewards_train/margins_1": 0.587749719619751, "rewards_train/margins_2": 1.0003474950790405, "step": 277 }, { "epoch": 0.83, "logps_train/policy_1_2": -220.40853881835938, "logps_train/policy_1_l": -157.2684783935547, "logps_train/policy_1_w": -141.58297729492188, "logps_train/policy_2_2": -173.87672424316406, "logps_train/policy_2_w": -182.74937438964844, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": 1.6982086896896362, "rewards_train/1-l": -1.6846598386764526, "rewards_train/1-w": 2.538577079772949, "rewards_train/2-2": 3.390843152999878, "rewards_train/2-w": 1.2109992504119873, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.223236918449402, "rewards_train/margins_1": 0.840368390083313, "rewards_train/margins_2": 2.1798439025878906, "step": 277 }, { "epoch": 0.83, "logps_train/policy_1_2": -91.29126739501953, "logps_train/policy_1_l": -50.51001739501953, "logps_train/policy_1_w": -74.18693542480469, "logps_train/policy_2_2": -68.21903991699219, "logps_train/policy_2_w": -92.73002624511719, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -44.75, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -89.5, "logps_train/ref_2_w": -103.0, "rewards_train/1-2": 1.7552484273910522, "rewards_train/1-l": -0.5737556219100952, "rewards_train/1-w": 1.2140220403671265, "rewards_train/2-2": 2.1218464374542236, "rewards_train/2-w": 1.069966197013855, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.7877776622772217, "rewards_train/margins_1": -0.5412263870239258, "rewards_train/margins_2": 1.0518802404403687, "step": 277 }, { "epoch": 0.83, "logps_train/policy_1_2": -193.3947296142578, "logps_train/policy_1_l": -84.33393859863281, "logps_train/policy_1_w": -137.38555908203125, "logps_train/policy_2_2": -152.62168884277344, "logps_train/policy_2_w": -175.34181213378906, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -77.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 1.1581826210021973, "rewards_train/1-l": -0.6895461082458496, "rewards_train/1-w": 2.2973814010620117, "rewards_train/2-2": 2.5786521434783936, "rewards_train/2-w": 1.142382025718689, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.9869275093078613, "rewards_train/margins_1": 1.1391987800598145, "rewards_train/margins_2": 1.4362701177597046, "step": 277 }, { "epoch": 0.83, "learning_rate": 3.393613674919473e-06, "loss": 0.7821, "step": 278 }, { "epoch": 0.83, "logps_train/policy_1_2": -182.89642333984375, "logps_train/policy_1_l": -155.94284057617188, "logps_train/policy_1_w": -149.18124389648438, "logps_train/policy_2_2": -147.9064483642578, "logps_train/policy_2_w": -190.77020263671875, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.3353564739227295, "rewards_train/1-l": -1.551511287689209, "rewards_train/1-w": 2.1994526386260986, "rewards_train/2-2": 2.4499800205230713, "rewards_train/2-w": 0.9682918787002563, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7509639263153076, "rewards_train/margins_1": 0.8640961647033691, "rewards_train/margins_2": 1.481688141822815, "step": 278 }, { "epoch": 0.83, "logps_train/policy_1_2": -109.72549438476562, "logps_train/policy_1_l": -146.10842895507812, "logps_train/policy_1_w": -68.16293334960938, "logps_train/policy_2_2": -80.87173461914062, "logps_train/policy_2_w": -111.14141845703125, "logps_train/ref_1_2": -112.5, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -126.5, "rewards_train/1-2": 0.25948214530944824, "rewards_train/1-l": -2.4420924186706543, "rewards_train/1-w": 2.450894355773926, "rewards_train/2-2": 1.2104824781417847, "rewards_train/2-w": 1.5546088218688965, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.89298677444458, "rewards_train/margins_1": 2.1914122104644775, "rewards_train/margins_2": -0.3441263437271118, "step": 278 }, { "epoch": 0.83, "logps_train/policy_1_2": -163.59799194335938, "logps_train/policy_1_l": -191.11167907714844, "logps_train/policy_1_w": -167.33145141601562, "logps_train/policy_2_2": -129.51779174804688, "logps_train/policy_2_w": -205.24090576171875, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.2409827709197998, "rewards_train/1-l": -1.683823585510254, "rewards_train/1-w": 3.148103713989258, "rewards_train/2-2": 2.278200387954712, "rewards_train/2-w": 1.4727838039398193, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.831927299499512, "rewards_train/margins_1": 1.907120943069458, "rewards_train/margins_2": 0.8054165840148926, "step": 278 }, { "epoch": 0.83, "logps_train/policy_1_2": -135.673095703125, "logps_train/policy_1_l": -96.36447143554688, "logps_train/policy_1_w": -108.99739074707031, "logps_train/policy_2_2": -102.49972534179688, "logps_train/policy_2_w": -150.78038024902344, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.4405021667480469, "rewards_train/1-l": -0.7908423542976379, "rewards_train/1-w": 2.551823854446411, "rewards_train/2-2": 1.9726842641830444, "rewards_train/2-w": 0.8963766098022461, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.342666208744049, "rewards_train/margins_1": 1.1113216876983643, "rewards_train/margins_2": 1.0763076543807983, "step": 278 }, { "epoch": 0.83, "logps_train/policy_1_2": -103.27323150634766, "logps_train/policy_1_l": -107.90172576904297, "logps_train/policy_1_w": -82.77373504638672, "logps_train/policy_2_2": -93.5596923828125, "logps_train/policy_2_w": -92.7691650390625, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -105.0, "rewards_train/1-2": 0.7386924028396606, "rewards_train/1-l": -1.406578779220581, "rewards_train/1-w": 1.5366888046264648, "rewards_train/2-2": 0.8405150771141052, "rewards_train/2-w": 1.221521019935608, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.943267583847046, "rewards_train/margins_1": 0.7979964017868042, "rewards_train/margins_2": -0.3810059428215027, "step": 278 }, { "epoch": 0.83, "logps_train/policy_1_2": -252.00244140625, "logps_train/policy_1_l": -170.5157470703125, "logps_train/policy_1_w": -135.26673889160156, "logps_train/policy_2_2": -200.09303283691406, "logps_train/policy_2_w": -174.10595703125, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 0.8935056924819946, "rewards_train/1-l": -1.4461052417755127, "rewards_train/1-w": 2.8092641830444336, "rewards_train/2-2": 2.7313218116760254, "rewards_train/2-w": 1.5042474269866943, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.255369424819946, "rewards_train/margins_1": 1.915758490562439, "rewards_train/margins_2": 1.227074384689331, "step": 278 }, { "epoch": 0.83, "logps_train/policy_1_2": -135.30682373046875, "logps_train/policy_1_l": -115.88734436035156, "logps_train/policy_1_w": -110.78356170654297, "logps_train/policy_2_2": -109.1545639038086, "logps_train/policy_2_w": -141.00689697265625, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 1.1165835857391357, "rewards_train/1-l": -1.225062370300293, "rewards_train/1-w": 1.320081353187561, "rewards_train/2-2": 1.8818089962005615, "rewards_train/2-w": 0.6164987087249756, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.545143723487854, "rewards_train/margins_1": 0.2034977674484253, "rewards_train/margins_2": 1.265310287475586, "step": 278 }, { "epoch": 0.83, "logps_train/policy_1_2": -252.98300170898438, "logps_train/policy_1_l": -294.9058532714844, "logps_train/policy_1_w": -216.65542602539062, "logps_train/policy_2_2": -204.2933349609375, "logps_train/policy_2_w": -262.030029296875, "logps_train/ref_1_2": -276.0, "logps_train/ref_1_l": -258.0, "logps_train/ref_1_w": -255.0, "logps_train/ref_2_2": -238.0, "logps_train/ref_2_w": -284.0, "rewards_train/1-2": 2.207949638366699, "rewards_train/1-l": -3.6429271697998047, "rewards_train/1-w": 3.8641459941864014, "rewards_train/2-2": 3.4144158363342285, "rewards_train/2-w": 2.204806089401245, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.507073163986206, "rewards_train/margins_1": 1.6561963558197021, "rewards_train/margins_2": 1.2096097469329834, "step": 278 }, { "epoch": 0.84, "logps_train/policy_1_2": -106.22587585449219, "logps_train/policy_1_l": -108.5159683227539, "logps_train/policy_1_w": -111.9915771484375, "logps_train/policy_2_2": -87.61882781982422, "logps_train/policy_2_w": -141.28372192382812, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.0524121522903442, "rewards_train/1-l": -1.5803077220916748, "rewards_train/1-w": 2.55084228515625, "rewards_train/2-2": 1.672492265701294, "rewards_train/2-w": 1.6466286182403564, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.131150007247925, "rewards_train/margins_1": 1.4984301328659058, "rewards_train/margins_2": 0.0258636474609375, "step": 279 }, { "epoch": 0.84, "logps_train/policy_1_2": -265.03668212890625, "logps_train/policy_1_l": -165.97711181640625, "logps_train/policy_1_w": -192.27169799804688, "logps_train/policy_2_2": -227.74295043945312, "logps_train/policy_2_w": -223.72683715820312, "logps_train/ref_1_2": -280.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -231.0, "logps_train/ref_2_2": -256.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 1.5369573831558228, "rewards_train/1-l": -1.396343469619751, "rewards_train/1-w": 3.854081153869629, "rewards_train/2-2": 2.9811739921569824, "rewards_train/2-w": 2.4804420471191406, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.25042462348938, "rewards_train/margins_1": 2.317123770713806, "rewards_train/margins_2": 0.5007319450378418, "step": 279 }, { "epoch": 0.84, "logps_train/policy_1_2": -189.66477966308594, "logps_train/policy_1_l": -235.68836975097656, "logps_train/policy_1_w": -144.1913604736328, "logps_train/policy_2_2": -149.61874389648438, "logps_train/policy_2_w": -196.01171875, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.674147129058838, "rewards_train/1-l": -2.5157129764556885, "rewards_train/1-w": 2.7886767387390137, "rewards_train/2-2": 2.956875801086426, "rewards_train/2-w": 1.5707032680511475, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.304389715194702, "rewards_train/margins_1": 1.1145296096801758, "rewards_train/margins_2": 1.3861725330352783, "step": 279 }, { "epoch": 0.84, "logps_train/policy_1_2": -189.90457153320312, "logps_train/policy_1_l": -175.01068115234375, "logps_train/policy_1_w": -101.65229797363281, "logps_train/policy_2_2": -152.90908813476562, "logps_train/policy_2_w": -133.017822265625, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.1696999073028564, "rewards_train/1-l": -1.1927056312561035, "rewards_train/1-w": 2.001176357269287, "rewards_train/2-2": 1.8180760145187378, "rewards_train/2-w": 1.2923579216003418, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1938819885253906, "rewards_train/margins_1": 0.8314764499664307, "rewards_train/margins_2": 0.525718092918396, "step": 279 }, { "epoch": 0.84, "logps_train/policy_1_2": -104.59674072265625, "logps_train/policy_1_l": -120.16656494140625, "logps_train/policy_1_w": -73.1385269165039, "logps_train/policy_2_2": -92.68346405029297, "logps_train/policy_2_w": -81.03992462158203, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -85.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -88.0, "rewards_train/1-2": 1.2462835311889648, "rewards_train/1-l": -0.6561099290847778, "rewards_train/1-w": 1.1571433544158936, "rewards_train/2-2": 1.6031869649887085, "rewards_train/2-w": 0.7370723485946655, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.8132532835006714, "rewards_train/margins_1": -0.08914017677307129, "rewards_train/margins_2": 0.866114616394043, "step": 279 }, { "epoch": 0.84, "logps_train/policy_1_2": -180.92105102539062, "logps_train/policy_1_l": -180.79652404785156, "logps_train/policy_1_w": -86.24224090576172, "logps_train/policy_2_2": -156.21934509277344, "logps_train/policy_2_w": -98.51860046386719, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -112.5, "rewards_train/1-2": 1.8680498600006104, "rewards_train/1-l": -2.263636350631714, "rewards_train/1-w": 1.6617134809494019, "rewards_train/2-2": 2.8511130809783936, "rewards_train/2-w": 1.36981999874115, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9253498315811157, "rewards_train/margins_1": -0.2063363790512085, "rewards_train/margins_2": 1.4812930822372437, "step": 279 }, { "epoch": 0.84, "logps_train/policy_1_2": -63.064353942871094, "logps_train/policy_1_l": -125.76708984375, "logps_train/policy_1_w": -71.59869384765625, "logps_train/policy_2_2": -47.52571487426758, "logps_train/policy_2_w": -92.20816040039062, "logps_train/ref_1_2": -68.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -90.5, "logps_train/ref_2_2": -58.5, "logps_train/ref_2_w": -105.0, "rewards_train/1-2": 0.47549813985824585, "rewards_train/1-l": -1.7050297260284424, "rewards_train/1-w": 1.879974365234375, "rewards_train/2-2": 1.0740885734558105, "rewards_train/2-w": 1.2369967699050903, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.5850040912628174, "rewards_train/margins_1": 1.4044762253761292, "rewards_train/margins_2": -0.16290819644927979, "step": 279 }, { "epoch": 0.84, "logps_train/policy_1_2": -91.75534057617188, "logps_train/policy_1_l": -75.86933898925781, "logps_train/policy_1_w": -76.84136962890625, "logps_train/policy_2_2": -76.8006362915039, "logps_train/policy_2_w": -98.92153930664062, "logps_train/ref_1_2": -106.0, "logps_train/ref_1_l": -67.5, "logps_train/ref_1_w": -95.5, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -110.0, "rewards_train/1-2": 1.3906774520874023, "rewards_train/1-l": -0.841231107711792, "rewards_train/1-w": 1.8619565963745117, "rewards_train/2-2": 1.9127098321914673, "rewards_train/2-w": 1.1109707355499268, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.7031877040863037, "rewards_train/margins_1": 0.4712791442871094, "rewards_train/margins_2": 0.8017390966415405, "step": 279 }, { "epoch": 0.84, "learning_rate": 3.3705040885859975e-06, "loss": 0.7462, "step": 280 }, { "epoch": 0.84, "logps_train/policy_1_2": -109.16664123535156, "logps_train/policy_1_l": -94.28797912597656, "logps_train/policy_1_w": -75.76837158203125, "logps_train/policy_2_2": -96.21117401123047, "logps_train/policy_2_w": -91.706787109375, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -85.0, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -101.0, "rewards_train/1-2": 0.9784531593322754, "rewards_train/1-l": -0.8920794129371643, "rewards_train/1-w": 1.6044127941131592, "rewards_train/2-2": 1.2874765396118164, "rewards_train/2-w": 0.9535397887229919, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.4964922070503235, "rewards_train/margins_1": 0.6259596347808838, "rewards_train/margins_2": 0.33393675088882446, "step": 280 }, { "epoch": 0.84, "logps_train/policy_1_2": -209.8155517578125, "logps_train/policy_1_l": -171.424560546875, "logps_train/policy_1_w": -172.6147918701172, "logps_train/policy_2_2": -171.72665405273438, "logps_train/policy_2_w": -209.07774353027344, "logps_train/ref_1_2": -227.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -203.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 1.7317252159118652, "rewards_train/1-l": -1.5772228240966797, "rewards_train/1-w": 2.544771194458008, "rewards_train/2-2": 3.077725410461426, "rewards_train/2-w": 1.7578513622283936, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.1219940185546875, "rewards_train/margins_1": 0.8130459785461426, "rewards_train/margins_2": 1.3198740482330322, "step": 280 }, { "epoch": 0.84, "logps_train/policy_1_2": -121.7634506225586, "logps_train/policy_1_l": -143.48825073242188, "logps_train/policy_1_w": -96.58628845214844, "logps_train/policy_2_2": -94.12370300292969, "logps_train/policy_2_w": -122.32060241699219, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -110.5, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.028733491897583, "rewards_train/1-l": -1.6290984153747559, "rewards_train/1-w": 1.6544568538665771, "rewards_train/2-2": 1.6313791275024414, "rewards_train/2-w": 1.3943073749542236, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.283555269241333, "rewards_train/margins_1": 0.6257233619689941, "rewards_train/margins_2": 0.23707175254821777, "step": 280 }, { "epoch": 0.84, "logps_train/policy_1_2": -84.3697738647461, "logps_train/policy_1_l": -48.94768142700195, "logps_train/policy_1_w": -56.05415344238281, "logps_train/policy_2_2": -68.68560791015625, "logps_train/policy_2_w": -67.68689727783203, "logps_train/ref_1_2": -91.0, "logps_train/ref_1_l": -42.5, "logps_train/ref_1_w": -67.0, "logps_train/ref_2_2": -80.0, "logps_train/ref_2_w": -74.5, "rewards_train/1-2": 0.6631202101707458, "rewards_train/1-l": -0.6318773031234741, "rewards_train/1-w": 1.0824756622314453, "rewards_train/2-2": 1.1356383562088013, "rewards_train/2-w": 0.6809197068214417, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.7143529653549194, "rewards_train/margins_1": 0.41935545206069946, "rewards_train/margins_2": 0.4547186493873596, "step": 280 }, { "epoch": 0.84, "logps_train/policy_1_2": -122.84898376464844, "logps_train/policy_1_l": -132.66062927246094, "logps_train/policy_1_w": -77.17201232910156, "logps_train/policy_2_2": -105.02249908447266, "logps_train/policy_2_w": -104.76972198486328, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -110.0, "rewards_train/1-2": 1.2568989992141724, "rewards_train/1-l": -1.799803614616394, "rewards_train/1-w": 0.9980326890945435, "rewards_train/2-2": 1.8864223957061768, "rewards_train/2-w": 0.4966605007648468, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.7978363037109375, "rewards_train/margins_1": -0.2588663101196289, "rewards_train/margins_2": 1.38976189494133, "step": 280 }, { "epoch": 0.84, "logps_train/policy_1_2": -188.22747802734375, "logps_train/policy_1_l": -187.52639770507812, "logps_train/policy_1_w": -108.879638671875, "logps_train/policy_2_2": -147.58871459960938, "logps_train/policy_2_w": -142.32373046875, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.2975633144378662, "rewards_train/1-l": -2.1268577575683594, "rewards_train/1-w": 1.4714109897613525, "rewards_train/2-2": 2.8387842178344727, "rewards_train/2-w": 0.7473130822181702, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.598268747329712, "rewards_train/margins_1": 0.17384767532348633, "rewards_train/margins_2": 2.0914711356163025, "step": 280 }, { "epoch": 0.84, "logps_train/policy_1_2": -91.35379028320312, "logps_train/policy_1_l": -65.88571166992188, "logps_train/policy_1_w": -65.16643524169922, "logps_train/policy_2_2": -69.01640319824219, "logps_train/policy_2_w": -99.78082275390625, "logps_train/ref_1_2": -97.0, "logps_train/ref_1_l": -56.25, "logps_train/ref_1_w": -81.0, "logps_train/ref_2_2": -78.5, "logps_train/ref_2_w": -106.0, "rewards_train/1-2": 0.5669642686843872, "rewards_train/1-l": -0.9719699621200562, "rewards_train/1-w": 1.562262773513794, "rewards_train/2-2": 0.9237499237060547, "rewards_train/2-w": 0.5766057968139648, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.53423273563385, "rewards_train/margins_1": 0.9952985048294067, "rewards_train/margins_2": 0.34714412689208984, "step": 280 }, { "epoch": 0.84, "logps_train/policy_1_2": -164.28543090820312, "logps_train/policy_1_l": -79.935791015625, "logps_train/policy_1_w": -105.47726440429688, "logps_train/policy_2_2": -135.3642120361328, "logps_train/policy_2_w": -130.26560974121094, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -72.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 0.7679418921470642, "rewards_train/1-l": -0.7864997386932373, "rewards_train/1-w": 1.8050074577331543, "rewards_train/2-2": 1.9749071598052979, "rewards_train/2-w": 0.8296895027160645, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.5915071964263916, "rewards_train/margins_1": 1.03706556558609, "rewards_train/margins_2": 1.1452176570892334, "step": 280 }, { "epoch": 0.84, "logps_train/policy_1_2": -136.50706481933594, "logps_train/policy_1_l": -184.32395935058594, "logps_train/policy_1_w": -103.70236206054688, "logps_train/policy_2_2": -102.71278381347656, "logps_train/policy_2_w": -143.43374633789062, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.3977307081222534, "rewards_train/1-l": -1.8058332204818726, "rewards_train/1-w": 2.972928047180176, "rewards_train/2-2": 2.422471523284912, "rewards_train/2-w": 1.9472500085830688, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.778761267662048, "rewards_train/margins_1": 1.5751973390579224, "rewards_train/margins_2": 0.47522151470184326, "step": 281 }, { "epoch": 0.84, "logps_train/policy_1_2": -188.04168701171875, "logps_train/policy_1_l": -218.6946563720703, "logps_train/policy_1_w": -196.70172119140625, "logps_train/policy_2_2": -157.5233154296875, "logps_train/policy_2_w": -239.15406799316406, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -229.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 2.171611785888672, "rewards_train/1-l": -2.0970046520233154, "rewards_train/1-w": 3.222015619277954, "rewards_train/2-2": 2.9472784996032715, "rewards_train/2-w": 1.5127171277999878, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.3190202713012695, "rewards_train/margins_1": 1.0504038333892822, "rewards_train/margins_2": 1.4345613718032837, "step": 281 }, { "epoch": 0.84, "logps_train/policy_1_2": -98.02694702148438, "logps_train/policy_1_l": -73.92582702636719, "logps_train/policy_1_w": -70.38287353515625, "logps_train/policy_2_2": -88.01416015625, "logps_train/policy_2_w": -94.70568084716797, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -61.0, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -107.0, "rewards_train/1-2": 1.49027419090271, "rewards_train/1-l": -1.282426357269287, "rewards_train/1-w": 1.6466245651245117, "rewards_train/2-2": 1.7310059070587158, "rewards_train/2-w": 1.2161507606506348, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.929050922393799, "rewards_train/margins_1": 0.15635037422180176, "rewards_train/margins_2": 0.514855146408081, "step": 281 }, { "epoch": 0.84, "logps_train/policy_1_2": -89.57350158691406, "logps_train/policy_1_l": -107.97481536865234, "logps_train/policy_1_w": -114.47392272949219, "logps_train/policy_2_2": -69.85450744628906, "logps_train/policy_2_w": -146.06515502929688, "logps_train/ref_1_2": -96.0, "logps_train/ref_1_l": -96.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -82.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.59968101978302, "rewards_train/1-l": -1.131075382232666, "rewards_train/1-w": 1.8690147399902344, "rewards_train/2-2": 1.2594717741012573, "rewards_train/2-w": 0.5591087937355042, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0000901222229004, "rewards_train/margins_1": 1.2693337202072144, "rewards_train/margins_2": 0.7003629803657532, "step": 281 }, { "epoch": 0.84, "logps_train/policy_1_2": -264.1907958984375, "logps_train/policy_1_l": -223.62799072265625, "logps_train/policy_1_w": -166.30979919433594, "logps_train/policy_2_2": -205.9416046142578, "logps_train/policy_2_w": -215.99656677246094, "logps_train/ref_1_2": -272.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -240.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 0.8137346506118774, "rewards_train/1-l": -2.083110809326172, "rewards_train/1-w": 2.878394842147827, "rewards_train/2-2": 3.426151990890503, "rewards_train/2-w": 1.2253429889678955, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.961505651473999, "rewards_train/margins_1": 2.0646601915359497, "rewards_train/margins_2": 2.2008090019226074, "step": 281 }, { "epoch": 0.84, "logps_train/policy_1_2": -86.33676147460938, "logps_train/policy_1_l": -88.41938781738281, "logps_train/policy_1_w": -72.54151916503906, "logps_train/policy_2_2": -65.83891296386719, "logps_train/policy_2_w": -90.74510192871094, "logps_train/ref_1_2": -101.5, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -104.5, "rewards_train/1-2": 1.4944491386413574, "rewards_train/1-l": -1.0460405349731445, "rewards_train/1-w": 1.927880048751831, "rewards_train/2-2": 1.9215772151947021, "rewards_train/2-w": 1.375880479812622, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.9739205837249756, "rewards_train/margins_1": 0.43343091011047363, "rewards_train/margins_2": 0.5456967353820801, "step": 281 }, { "epoch": 0.84, "logps_train/policy_1_2": -128.7789306640625, "logps_train/policy_1_l": -106.01748657226562, "logps_train/policy_1_w": -126.38859558105469, "logps_train/policy_2_2": -94.93230438232422, "logps_train/policy_2_w": -164.65701293945312, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": 0.9006234407424927, "rewards_train/1-l": -1.4809472560882568, "rewards_train/1-w": 2.517781972885132, "rewards_train/2-2": 2.292316436767578, "rewards_train/2-w": 1.199141025543213, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9987292289733887, "rewards_train/margins_1": 1.6171585321426392, "rewards_train/margins_2": 1.0931754112243652, "step": 281 }, { "epoch": 0.84, "logps_train/policy_1_2": -151.48513793945312, "logps_train/policy_1_l": -180.8825225830078, "logps_train/policy_1_w": -103.38031768798828, "logps_train/policy_2_2": -120.17155456542969, "logps_train/policy_2_w": -139.86386108398438, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.151486873626709, "rewards_train/1-l": -2.6851277351379395, "rewards_train/1-w": 1.8982962369918823, "rewards_train/2-2": 1.9484691619873047, "rewards_train/2-w": 1.3214272260665894, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.583423972129822, "rewards_train/margins_1": 0.7468093633651733, "rewards_train/margins_2": 0.6270419359207153, "step": 281 }, { "epoch": 0.84, "learning_rate": 3.3473095425880795e-06, "loss": 0.8207, "step": 282 }, { "epoch": 0.84, "logps_train/policy_1_2": -174.48204040527344, "logps_train/policy_1_l": -195.34033203125, "logps_train/policy_1_w": -175.48654174804688, "logps_train/policy_2_2": -140.3419952392578, "logps_train/policy_2_w": -216.0751953125, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 1.8236708641052246, "rewards_train/1-l": -1.455909252166748, "rewards_train/1-w": 2.285719871520996, "rewards_train/2-2": 2.6501755714416504, "rewards_train/2-w": 0.765918493270874, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.741629123687744, "rewards_train/margins_1": 0.4620490074157715, "rewards_train/margins_2": 1.8842570781707764, "step": 282 }, { "epoch": 0.84, "logps_train/policy_1_2": -120.73454284667969, "logps_train/policy_1_l": -102.45046997070312, "logps_train/policy_1_w": -56.226829528808594, "logps_train/policy_2_2": -89.09695434570312, "logps_train/policy_2_w": -88.0884017944336, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -70.5, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -93.5, "rewards_train/1-2": 1.9952964782714844, "rewards_train/1-l": -0.9097929000854492, "rewards_train/1-w": 1.432786226272583, "rewards_train/2-2": 2.984055280685425, "rewards_train/2-w": 0.506784975528717, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.3425791263580322, "rewards_train/margins_1": -0.5625102519989014, "rewards_train/margins_2": 2.4772703051567078, "step": 282 }, { "epoch": 0.84, "logps_train/policy_1_2": -183.60865783691406, "logps_train/policy_1_l": -156.39645385742188, "logps_train/policy_1_w": -181.0347137451172, "logps_train/policy_2_2": -140.82339477539062, "logps_train/policy_2_w": -230.05368041992188, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.1625717878341675, "rewards_train/1-l": -0.9773414134979248, "rewards_train/1-w": 2.2035601139068604, "rewards_train/2-2": 2.542269706726074, "rewards_train/2-w": 0.7915071249008179, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.180901527404785, "rewards_train/margins_1": 1.0409883260726929, "rewards_train/margins_2": 1.7507625818252563, "step": 282 }, { "epoch": 0.84, "logps_train/policy_1_2": -135.24319458007812, "logps_train/policy_1_l": -108.48342895507812, "logps_train/policy_1_w": -80.31088256835938, "logps_train/policy_2_2": -110.85354614257812, "logps_train/policy_2_w": -93.135498046875, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 1.6858357191085815, "rewards_train/1-l": -1.1815464496612549, "rewards_train/1-w": 2.0454745292663574, "rewards_train/2-2": 2.665426254272461, "rewards_train/2-w": 1.6212158203125, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.2270209789276123, "rewards_train/margins_1": 0.3596388101577759, "rewards_train/margins_2": 1.044210433959961, "step": 282 }, { "epoch": 0.84, "logps_train/policy_1_2": -188.95913696289062, "logps_train/policy_1_l": -138.6885528564453, "logps_train/policy_1_w": -134.85885620117188, "logps_train/policy_2_2": -137.1574249267578, "logps_train/policy_2_w": -169.08016967773438, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.4892442226409912, "rewards_train/1-l": -1.7536213397979736, "rewards_train/1-w": 2.761477470397949, "rewards_train/2-2": 3.2006640434265137, "rewards_train/2-w": 1.2622956037521362, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.515098810195923, "rewards_train/margins_1": 1.272233247756958, "rewards_train/margins_2": 1.9383684396743774, "step": 282 }, { "epoch": 0.84, "logps_train/policy_1_2": -110.3548812866211, "logps_train/policy_1_l": -127.58956909179688, "logps_train/policy_1_w": -117.7249984741211, "logps_train/policy_2_2": -90.58660125732422, "logps_train/policy_2_w": -139.05404663085938, "logps_train/ref_1_2": -125.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 1.464902400970459, "rewards_train/1-l": -0.7671598792076111, "rewards_train/1-w": 1.4292582273483276, "rewards_train/2-2": 1.9401681423187256, "rewards_train/2-w": 0.6074861288070679, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.1964181065559387, "rewards_train/margins_1": -0.03564417362213135, "rewards_train/margins_2": 1.3326820135116577, "step": 282 }, { "epoch": 0.84, "logps_train/policy_1_2": -134.83042907714844, "logps_train/policy_1_l": -125.45503997802734, "logps_train/policy_1_w": -47.49627685546875, "logps_train/policy_2_2": -111.17414855957031, "logps_train/policy_2_w": -69.02876281738281, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -62.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -79.0, "rewards_train/1-2": 1.505238652229309, "rewards_train/1-l": -1.4560508728027344, "rewards_train/1-w": 1.43552827835083, "rewards_train/2-2": 2.392350435256958, "rewards_train/2-w": 1.0147017240524292, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.8915791511535645, "rewards_train/margins_1": -0.069710373878479, "rewards_train/margins_2": 1.3776487112045288, "step": 282 }, { "epoch": 0.84, "logps_train/policy_1_2": -128.9178466796875, "logps_train/policy_1_l": -150.66183471679688, "logps_train/policy_1_w": -55.301631927490234, "logps_train/policy_2_2": -98.88902282714844, "logps_train/policy_2_w": -78.16444396972656, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -71.5, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -91.0, "rewards_train/1-2": 1.0929800271987915, "rewards_train/1-l": -1.727708339691162, "rewards_train/1-w": 1.6113406419754028, "rewards_train/2-2": 2.0133442878723145, "rewards_train/2-w": 1.2416609525680542, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.339048981666565, "rewards_train/margins_1": 0.5183606147766113, "rewards_train/margins_2": 0.7716833353042603, "step": 282 }, { "epoch": 0.85, "logps_train/policy_1_2": -223.22540283203125, "logps_train/policy_1_l": -160.89918518066406, "logps_train/policy_1_w": -94.01085662841797, "logps_train/policy_2_2": -190.06939697265625, "logps_train/policy_2_w": -117.20968627929688, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.8883960247039795, "rewards_train/1-l": -2.077418565750122, "rewards_train/1-w": 2.1825079917907715, "rewards_train/2-2": 3.248530149459839, "rewards_train/2-w": 1.5141874551773071, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.2599265575408936, "rewards_train/margins_1": 0.294111967086792, "rewards_train/margins_2": 1.7343426942825317, "step": 283 }, { "epoch": 0.85, "logps_train/policy_1_2": -127.798583984375, "logps_train/policy_1_l": -115.36932373046875, "logps_train/policy_1_w": -95.60511779785156, "logps_train/policy_2_2": -107.26367950439453, "logps_train/policy_2_w": -107.75790405273438, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -100.5, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 1.9513914585113525, "rewards_train/1-l": -1.486102819442749, "rewards_train/1-w": 2.3004250526428223, "rewards_train/2-2": 2.0736327171325684, "rewards_train/2-w": 1.7413965463638306, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.7865278720855713, "rewards_train/margins_1": 0.3490335941314697, "rewards_train/margins_2": 0.3322361707687378, "step": 283 }, { "epoch": 0.85, "logps_train/policy_1_2": -238.5015869140625, "logps_train/policy_1_l": -288.43536376953125, "logps_train/policy_1_w": -135.75112915039062, "logps_train/policy_2_2": -191.1600341796875, "logps_train/policy_2_w": -170.22607421875, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -258.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -224.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.9529669284820557, "rewards_train/1-l": -3.074786424636841, "rewards_train/1-w": 2.4545745849609375, "rewards_train/2-2": 3.2644646167755127, "rewards_train/2-w": 1.7664544582366943, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.529361009597778, "rewards_train/margins_1": 0.5016076564788818, "rewards_train/margins_2": 1.4980101585388184, "step": 283 }, { "epoch": 0.85, "logps_train/policy_1_2": -150.06747436523438, "logps_train/policy_1_l": -120.8991470336914, "logps_train/policy_1_w": -121.13967895507812, "logps_train/policy_2_2": -120.22040557861328, "logps_train/policy_2_w": -156.73822021484375, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.0795809030532837, "rewards_train/1-l": -1.1666721105575562, "rewards_train/1-w": 2.818844795227051, "rewards_train/2-2": 2.2490532398223877, "rewards_train/2-w": 1.5914114713668823, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.985516905784607, "rewards_train/margins_1": 1.739263892173767, "rewards_train/margins_2": 0.6576417684555054, "step": 283 }, { "epoch": 0.85, "logps_train/policy_1_2": -91.49577331542969, "logps_train/policy_1_l": -156.148681640625, "logps_train/policy_1_w": -137.35488891601562, "logps_train/policy_2_2": -73.41372680664062, "logps_train/policy_2_w": -179.95196533203125, "logps_train/ref_1_2": -103.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -89.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.1379226446151733, "rewards_train/1-l": -1.2909421920776367, "rewards_train/1-w": 3.362948417663574, "rewards_train/2-2": 1.5523779392242432, "rewards_train/2-w": 2.0016794204711914, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.653890609741211, "rewards_train/margins_1": 2.225025773048401, "rewards_train/margins_2": -0.44930148124694824, "step": 283 }, { "epoch": 0.85, "logps_train/policy_1_2": -148.1197509765625, "logps_train/policy_1_l": -140.30996704101562, "logps_train/policy_1_w": -136.27349853515625, "logps_train/policy_2_2": -124.99185943603516, "logps_train/policy_2_w": -171.12991333007812, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 2.0194714069366455, "rewards_train/1-l": -1.5630278587341309, "rewards_train/1-w": 2.8210878372192383, "rewards_train/2-2": 2.8781819343566895, "rewards_train/2-w": 1.6776344776153564, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.384115695953369, "rewards_train/margins_1": 0.8016164302825928, "rewards_train/margins_2": 1.200547456741333, "step": 283 }, { "epoch": 0.85, "logps_train/policy_1_2": -258.790771484375, "logps_train/policy_1_l": -319.37725830078125, "logps_train/policy_1_w": -191.26980590820312, "logps_train/policy_2_2": -205.01815795898438, "logps_train/policy_2_w": -242.6763916015625, "logps_train/ref_1_2": -272.0, "logps_train/ref_1_l": -274.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -242.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 1.308423638343811, "rewards_train/1-l": -4.565069675445557, "rewards_train/1-w": 3.2917699813842773, "rewards_train/2-2": 3.648184299468994, "rewards_train/2-w": 1.6636104583740234, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.856839656829834, "rewards_train/margins_1": 1.9833463430404663, "rewards_train/margins_2": 1.9845738410949707, "step": 283 }, { "epoch": 0.85, "logps_train/policy_1_2": -81.92524719238281, "logps_train/policy_1_l": -135.23822021484375, "logps_train/policy_1_w": -97.94078063964844, "logps_train/policy_2_2": -71.65762329101562, "logps_train/policy_2_w": -110.62324523925781, "logps_train/ref_1_2": -86.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -80.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 0.4231010675430298, "rewards_train/1-l": -1.3349545001983643, "rewards_train/1-w": 2.324671745300293, "rewards_train/2-2": 0.8201751708984375, "rewards_train/2-w": 1.742362141609192, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 3.6596262454986572, "rewards_train/margins_1": 1.9015706777572632, "rewards_train/margins_2": -0.9221869707107544, "step": 283 }, { "epoch": 0.85, "learning_rate": 3.32403230067252e-06, "loss": 0.7628, "step": 284 }, { "epoch": 0.85, "logps_train/policy_1_2": -176.1693572998047, "logps_train/policy_1_l": -173.23602294921875, "logps_train/policy_1_w": -144.73776245117188, "logps_train/policy_2_2": -146.7958221435547, "logps_train/policy_2_w": -179.73953247070312, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 1.4221265316009521, "rewards_train/1-l": -1.9846858978271484, "rewards_train/1-w": 2.6432642936706543, "rewards_train/2-2": 2.208698272705078, "rewards_train/2-w": 1.3858131170272827, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.627950191497803, "rewards_train/margins_1": 1.2211377620697021, "rewards_train/margins_2": 0.8228851556777954, "step": 284 }, { "epoch": 0.85, "logps_train/policy_1_2": -154.771728515625, "logps_train/policy_1_l": -186.62283325195312, "logps_train/policy_1_w": -129.48297119140625, "logps_train/policy_2_2": -115.60888671875, "logps_train/policy_2_w": -179.6973876953125, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": 1.3126717805862427, "rewards_train/1-l": -2.260281562805176, "rewards_train/1-w": 2.7157652378082275, "rewards_train/2-2": 2.6234865188598633, "rewards_train/2-w": 1.5661983489990234, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.976046800613403, "rewards_train/margins_1": 1.4030934572219849, "rewards_train/margins_2": 1.0572881698608398, "step": 284 }, { "epoch": 0.85, "logps_train/policy_1_2": -122.12265014648438, "logps_train/policy_1_l": -123.25349426269531, "logps_train/policy_1_w": -80.74305725097656, "logps_train/policy_2_2": -96.04440307617188, "logps_train/policy_2_w": -106.50872802734375, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -114.5, "logps_train/ref_1_w": -93.5, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 0.9627345204353333, "rewards_train/1-l": -0.8651934862136841, "rewards_train/1-w": 1.2686623334884644, "rewards_train/2-2": 1.9279813766479492, "rewards_train/2-w": 0.43272098898887634, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.1338558197021484, "rewards_train/margins_1": 0.3059278130531311, "rewards_train/margins_2": 1.4952603876590729, "step": 284 }, { "epoch": 0.85, "logps_train/policy_1_2": -162.4751739501953, "logps_train/policy_1_l": -114.00834655761719, "logps_train/policy_1_w": -102.5101318359375, "logps_train/policy_2_2": -121.64031219482422, "logps_train/policy_2_w": -137.73092651367188, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -96.5, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.1884198188781738, "rewards_train/1-l": -1.7504438161849976, "rewards_train/1-w": 2.5755491256713867, "rewards_train/2-2": 2.513312578201294, "rewards_train/2-w": 1.4222203493118286, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.325992941856384, "rewards_train/margins_1": 1.387129306793213, "rewards_train/margins_2": 1.0910922288894653, "step": 284 }, { "epoch": 0.85, "logps_train/policy_1_2": -193.0595703125, "logps_train/policy_1_l": -188.73033142089844, "logps_train/policy_1_w": -193.25827026367188, "logps_train/policy_2_2": -158.04373168945312, "logps_train/policy_2_w": -237.77645874023438, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.9002933502197266, "rewards_train/1-l": -1.7183455228805542, "rewards_train/1-w": 3.096047878265381, "rewards_train/2-2": 3.1550025939941406, "rewards_train/2-w": 1.62235426902771, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.814393401145935, "rewards_train/margins_1": 1.1957545280456543, "rewards_train/margins_2": 1.5326483249664307, "step": 284 }, { "epoch": 0.85, "logps_train/policy_1_2": -147.5653839111328, "logps_train/policy_1_l": -196.00616455078125, "logps_train/policy_1_w": -153.0774383544922, "logps_train/policy_2_2": -119.3832778930664, "logps_train/policy_2_w": -190.86837768554688, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": 1.268460988998413, "rewards_train/1-l": -2.485576629638672, "rewards_train/1-w": 3.078585147857666, "rewards_train/2-2": 2.0605006217956543, "rewards_train/2-w": 1.7893333435058594, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.564161777496338, "rewards_train/margins_1": 1.810124158859253, "rewards_train/margins_2": 0.2711672782897949, "step": 284 }, { "epoch": 0.85, "logps_train/policy_1_2": -159.49624633789062, "logps_train/policy_1_l": -191.34323120117188, "logps_train/policy_1_w": -137.18325805664062, "logps_train/policy_2_2": -109.00502014160156, "logps_train/policy_2_w": -182.49005126953125, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": 1.13240647315979, "rewards_train/1-l": -1.361373782157898, "rewards_train/1-w": 2.731968402862549, "rewards_train/2-2": 2.554966449737549, "rewards_train/2-w": 1.2455265522003174, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.093342185020447, "rewards_train/margins_1": 1.5995619297027588, "rewards_train/margins_2": 1.3094398975372314, "step": 284 }, { "epoch": 0.85, "logps_train/policy_1_2": -142.86573791503906, "logps_train/policy_1_l": -116.60752868652344, "logps_train/policy_1_w": -162.44662475585938, "logps_train/policy_2_2": -121.95695495605469, "logps_train/policy_2_w": -181.2012939453125, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.4853007793426514, "rewards_train/1-l": -0.7971789240837097, "rewards_train/1-w": 2.105337142944336, "rewards_train/2-2": 2.0480544567108154, "rewards_train/2-w": 1.4423712491989136, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.9025160670280457, "rewards_train/margins_1": 0.6200363636016846, "rewards_train/margins_2": 0.6056832075119019, "step": 284 }, { "epoch": 0.85, "logps_train/policy_1_2": -203.92372131347656, "logps_train/policy_1_l": -216.71742248535156, "logps_train/policy_1_w": -145.39459228515625, "logps_train/policy_2_2": -167.45428466796875, "logps_train/policy_2_w": -170.03530883789062, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -193.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.565440058708191, "rewards_train/1-l": -2.3276021480560303, "rewards_train/1-w": 1.698235273361206, "rewards_train/2-2": 3.00144624710083, "rewards_train/2-w": 1.1183438301086426, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.025837421417236, "rewards_train/margins_1": 0.13279521465301514, "rewards_train/margins_2": 1.8831024169921875, "step": 285 }, { "epoch": 0.85, "logps_train/policy_1_2": -227.97854614257812, "logps_train/policy_1_l": -262.4231262207031, "logps_train/policy_1_w": -142.98953247070312, "logps_train/policy_2_2": -197.27117919921875, "logps_train/policy_2_w": -175.2406005859375, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.8333948850631714, "rewards_train/1-l": -3.1501264572143555, "rewards_train/1-w": 3.0229225158691406, "rewards_train/2-2": 3.135382652282715, "rewards_train/2-w": 2.1290650367736816, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.173048973083496, "rewards_train/margins_1": 1.1895276308059692, "rewards_train/margins_2": 1.0063176155090332, "step": 285 }, { "epoch": 0.85, "logps_train/policy_1_2": -115.06349182128906, "logps_train/policy_1_l": -173.2742156982422, "logps_train/policy_1_w": -107.83955383300781, "logps_train/policy_2_2": -91.93757629394531, "logps_train/policy_2_w": -138.7033233642578, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -112.5, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.3827133178710938, "rewards_train/1-l": -1.4543745517730713, "rewards_train/1-w": 2.0734658241271973, "rewards_train/2-2": 2.048429489135742, "rewards_train/2-w": 1.2968549728393555, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.5278403759002686, "rewards_train/margins_1": 0.6907525062561035, "rewards_train/margins_2": 0.7515745162963867, "step": 285 }, { "epoch": 0.85, "logps_train/policy_1_2": -139.84268188476562, "logps_train/policy_1_l": -159.4585418701172, "logps_train/policy_1_w": -139.12973022460938, "logps_train/policy_2_2": -110.40218353271484, "logps_train/policy_2_w": -175.26568603515625, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.7501064538955688, "rewards_train/1-l": -1.3048384189605713, "rewards_train/1-w": 2.4621243476867676, "rewards_train/2-2": 2.569157123565674, "rewards_train/2-w": 1.2659122943878174, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.766962766647339, "rewards_train/margins_1": 0.7120178937911987, "rewards_train/margins_2": 1.3032448291778564, "step": 285 }, { "epoch": 0.85, "logps_train/policy_1_2": -191.1611328125, "logps_train/policy_1_l": -201.61544799804688, "logps_train/policy_1_w": -194.83648681640625, "logps_train/policy_2_2": -165.47308349609375, "logps_train/policy_2_w": -214.1700439453125, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -232.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -244.0, "rewards_train/1-2": 2.0588877201080322, "rewards_train/1-l": -2.180295705795288, "rewards_train/1-w": 3.745453119277954, "rewards_train/2-2": 2.6448779106140137, "rewards_train/2-w": 2.94158935546875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.925748825073242, "rewards_train/margins_1": 1.6865653991699219, "rewards_train/margins_2": -0.29671144485473633, "step": 285 }, { "epoch": 0.85, "logps_train/policy_1_2": -71.55107116699219, "logps_train/policy_1_l": -159.44488525390625, "logps_train/policy_1_w": -92.47062683105469, "logps_train/policy_2_2": -56.46415710449219, "logps_train/policy_2_w": -127.29930877685547, "logps_train/ref_1_2": -80.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -69.0, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": 0.8800486922264099, "rewards_train/1-l": -1.6687066555023193, "rewards_train/1-w": 2.195125102996826, "rewards_train/2-2": 1.2656936645507812, "rewards_train/2-w": 1.2138192653656006, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.8638317584991455, "rewards_train/margins_1": 1.3150764107704163, "rewards_train/margins_2": 0.051874399185180664, "step": 285 }, { "epoch": 0.85, "logps_train/policy_1_2": -136.759521484375, "logps_train/policy_1_l": -60.30867385864258, "logps_train/policy_1_w": -95.69857788085938, "logps_train/policy_2_2": -100.3808822631836, "logps_train/policy_2_w": -131.2822723388672, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -54.25, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.8959226608276367, "rewards_train/1-l": -0.6002033352851868, "rewards_train/1-w": 2.7750635147094727, "rewards_train/2-2": 2.254098892211914, "rewards_train/2-w": 1.5334911346435547, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.3752668499946594, "rewards_train/margins_1": 1.879140853881836, "rewards_train/margins_2": 0.7206077575683594, "step": 285 }, { "epoch": 0.85, "logps_train/policy_1_2": -145.77320861816406, "logps_train/policy_1_l": -166.17453002929688, "logps_train/policy_1_w": -131.9271240234375, "logps_train/policy_2_2": -123.52912902832031, "logps_train/policy_2_w": -156.65931701660156, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.7554916143417358, "rewards_train/1-l": -1.4455788135528564, "rewards_train/1-w": 2.1799449920654297, "rewards_train/2-2": 2.306462287902832, "rewards_train/2-w": 1.3700060844421387, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.625523805618286, "rewards_train/margins_1": 0.42445337772369385, "rewards_train/margins_2": 0.9364562034606934, "step": 285 }, { "epoch": 0.86, "learning_rate": 3.300674634657094e-06, "loss": 0.6494, "step": 286 }, { "epoch": 0.86, "logps_train/policy_1_2": -74.50550842285156, "logps_train/policy_1_l": -96.60693359375, "logps_train/policy_1_w": -93.67342376708984, "logps_train/policy_2_2": -51.67629623413086, "logps_train/policy_2_w": -127.79741668701172, "logps_train/ref_1_2": -85.0, "logps_train/ref_1_l": -84.5, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -67.5, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 1.0306994915008545, "rewards_train/1-l": -1.2077641487121582, "rewards_train/1-w": 1.604386329650879, "rewards_train/2-2": 1.5956517457962036, "rewards_train/2-w": 0.7561962008476257, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.812150478363037, "rewards_train/margins_1": 0.5736868381500244, "rewards_train/margins_2": 0.8394555449485779, "step": 286 }, { "epoch": 0.86, "logps_train/policy_1_2": -69.10420227050781, "logps_train/policy_1_l": -124.64192199707031, "logps_train/policy_1_w": -62.15043640136719, "logps_train/policy_2_2": -58.89448547363281, "logps_train/policy_2_w": -76.80333709716797, "logps_train/ref_1_2": -80.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -87.5, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -97.0, "rewards_train/1-2": 1.088994026184082, "rewards_train/1-l": -2.2886061668395996, "rewards_train/1-w": 2.538862705230713, "rewards_train/2-2": 1.5140670537948608, "rewards_train/2-w": 1.9994511604309082, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.8274688720703125, "rewards_train/margins_1": 1.4498686790466309, "rewards_train/margins_2": -0.48538410663604736, "step": 286 }, { "epoch": 0.86, "logps_train/policy_1_2": -193.22598266601562, "logps_train/policy_1_l": -167.8675537109375, "logps_train/policy_1_w": -176.93246459960938, "logps_train/policy_2_2": -165.02574157714844, "logps_train/policy_2_w": -202.18359375, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -225.0, "rewards_train/1-2": 1.3633406162261963, "rewards_train/1-l": -1.4922245740890503, "rewards_train/1-w": 3.29815936088562, "rewards_train/2-2": 2.463832378387451, "rewards_train/2-w": 2.256641387939453, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.79038393497467, "rewards_train/margins_1": 1.9348187446594238, "rewards_train/margins_2": 0.20719099044799805, "step": 286 }, { "epoch": 0.86, "logps_train/policy_1_2": -199.2135772705078, "logps_train/policy_1_l": -208.5335693359375, "logps_train/policy_1_w": -152.15936279296875, "logps_train/policy_2_2": -150.40628051757812, "logps_train/policy_2_w": -191.82928466796875, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": 0.22864334285259247, "rewards_train/1-l": -1.6439818143844604, "rewards_train/1-w": 2.405938148498535, "rewards_train/2-2": 1.721873164176941, "rewards_train/2-w": 1.5295716524124146, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.049919962882996, "rewards_train/margins_1": 2.1772948056459427, "rewards_train/margins_2": 0.19230151176452637, "step": 286 }, { "epoch": 0.86, "logps_train/policy_1_2": -117.86056518554688, "logps_train/policy_1_l": -128.9781494140625, "logps_train/policy_1_w": -100.51426696777344, "logps_train/policy_2_2": -92.19168090820312, "logps_train/policy_2_w": -123.99024963378906, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -107.5, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.8240996599197388, "rewards_train/1-l": -1.975158452987671, "rewards_train/1-w": 1.9087295532226562, "rewards_train/2-2": 1.5355188846588135, "rewards_train/2-w": 1.1666003465652466, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.883888006210327, "rewards_train/margins_1": 1.0846298933029175, "rewards_train/margins_2": 0.3689185380935669, "step": 286 }, { "epoch": 0.86, "logps_train/policy_1_2": -161.7625732421875, "logps_train/policy_1_l": -156.37017822265625, "logps_train/policy_1_w": -130.39540100097656, "logps_train/policy_2_2": -127.84186553955078, "logps_train/policy_2_w": -173.102294921875, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.5487436056137085, "rewards_train/1-l": -1.3479551076889038, "rewards_train/1-w": 3.0885848999023438, "rewards_train/2-2": 2.7720627784729004, "rewards_train/2-w": 1.7147696018218994, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.436540007591248, "rewards_train/margins_1": 1.5398412942886353, "rewards_train/margins_2": 1.057293176651001, "step": 286 }, { "epoch": 0.86, "logps_train/policy_1_2": -98.27593994140625, "logps_train/policy_1_l": -162.69595336914062, "logps_train/policy_1_w": -165.0201416015625, "logps_train/policy_2_2": -78.19741821289062, "logps_train/policy_2_w": -199.52777099609375, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.563812017440796, "rewards_train/1-l": -2.76080584526062, "rewards_train/1-w": 2.7901740074157715, "rewards_train/2-2": 2.0099451541900635, "rewards_train/2-w": 1.519099235534668, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.550979852676392, "rewards_train/margins_1": 1.2263619899749756, "rewards_train/margins_2": 0.4908459186553955, "step": 286 }, { "epoch": 0.86, "logps_train/policy_1_2": -72.30618286132812, "logps_train/policy_1_l": -97.71868896484375, "logps_train/policy_1_w": -63.32366943359375, "logps_train/policy_2_2": -61.29680252075195, "logps_train/policy_2_w": -81.32686614990234, "logps_train/ref_1_2": -83.0, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -77.5, "logps_train/ref_2_2": -76.5, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": 1.090475082397461, "rewards_train/1-l": -1.096477746963501, "rewards_train/1-w": 1.4223206043243408, "rewards_train/2-2": 1.5054757595062256, "rewards_train/2-w": 1.0571569204330444, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.518798351287842, "rewards_train/margins_1": 0.3318455219268799, "rewards_train/margins_2": 0.44831883907318115, "step": 286 }, { "epoch": 0.86, "logps_train/policy_1_2": -149.0794219970703, "logps_train/policy_1_l": -178.4414825439453, "logps_train/policy_1_w": -127.84298706054688, "logps_train/policy_2_2": -126.22323608398438, "logps_train/policy_2_w": -166.1186981201172, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.2639333009719849, "rewards_train/1-l": -2.9394607543945312, "rewards_train/1-w": 2.9516382217407227, "rewards_train/2-2": 2.0026774406433105, "rewards_train/2-w": 1.5506291389465332, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.891098976135254, "rewards_train/margins_1": 1.6877049207687378, "rewards_train/margins_2": 0.45204830169677734, "step": 287 }, { "epoch": 0.86, "logps_train/policy_1_2": -205.48463439941406, "logps_train/policy_1_l": -168.7666473388672, "logps_train/policy_1_w": -148.36172485351562, "logps_train/policy_2_2": -167.973876953125, "logps_train/policy_2_w": -202.0255126953125, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.7109110355377197, "rewards_train/1-l": -2.0491743087768555, "rewards_train/1-w": 3.320078134536743, "rewards_train/2-2": 3.071361780166626, "rewards_train/2-w": 1.6943244934082031, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.369252443313599, "rewards_train/margins_1": 1.6091670989990234, "rewards_train/margins_2": 1.3770372867584229, "step": 287 }, { "epoch": 0.86, "logps_train/policy_1_2": -189.54811096191406, "logps_train/policy_1_l": -219.35531616210938, "logps_train/policy_1_w": -176.3252410888672, "logps_train/policy_2_2": -161.2216033935547, "logps_train/policy_2_w": -202.8501739501953, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 1.7975332736968994, "rewards_train/1-l": -2.548349380493164, "rewards_train/1-w": 2.1471636295318604, "rewards_train/2-2": 2.6415114402770996, "rewards_train/2-w": 1.3046302795410156, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.695513010025024, "rewards_train/margins_1": 0.34963035583496094, "rewards_train/margins_2": 1.336881160736084, "step": 287 }, { "epoch": 0.86, "logps_train/policy_1_2": -204.18292236328125, "logps_train/policy_1_l": -198.52175903320312, "logps_train/policy_1_w": -148.6434326171875, "logps_train/policy_2_2": -170.74539184570312, "logps_train/policy_2_w": -177.7900848388672, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.447723627090454, "rewards_train/1-l": -2.1210238933563232, "rewards_train/1-w": 2.470031499862671, "rewards_train/2-2": 2.3907930850982666, "rewards_train/2-w": 1.42177152633667, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.591055393218994, "rewards_train/margins_1": 1.0223078727722168, "rewards_train/margins_2": 0.9690215587615967, "step": 287 }, { "epoch": 0.86, "logps_train/policy_1_2": -92.87710571289062, "logps_train/policy_1_l": -123.66383361816406, "logps_train/policy_1_w": -80.28986358642578, "logps_train/policy_2_2": -75.18102264404297, "logps_train/policy_2_w": -107.44725036621094, "logps_train/ref_1_2": -107.5, "logps_train/ref_1_l": -114.5, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -95.5, "logps_train/ref_2_w": -113.0, "rewards_train/1-2": 1.4458835124969482, "rewards_train/1-l": -0.9382578730583191, "rewards_train/1-w": 1.5483578443527222, "rewards_train/2-2": 2.026819944381714, "rewards_train/2-w": 0.5306652188301086, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.4866157174110413, "rewards_train/margins_1": 0.10247433185577393, "rewards_train/margins_2": 1.4961547255516052, "step": 287 }, { "epoch": 0.86, "logps_train/policy_1_2": -146.4478759765625, "logps_train/policy_1_l": -141.76963806152344, "logps_train/policy_1_w": -141.8832550048828, "logps_train/policy_2_2": -115.31636810302734, "logps_train/policy_2_w": -172.1598663330078, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.3403692245483398, "rewards_train/1-l": -1.5937612056732178, "rewards_train/1-w": 2.1409709453582764, "rewards_train/2-2": 2.407425880432129, "rewards_train/2-w": 1.2746384143829346, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.734732151031494, "rewards_train/margins_1": 0.8006017208099365, "rewards_train/margins_2": 1.1327874660491943, "step": 287 }, { "epoch": 0.86, "logps_train/policy_1_2": -88.04436492919922, "logps_train/policy_1_l": -96.37047576904297, "logps_train/policy_1_w": -52.753273010253906, "logps_train/policy_2_2": -72.19806671142578, "logps_train/policy_2_w": -68.63545989990234, "logps_train/ref_1_2": -99.0, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -69.0, "logps_train/ref_2_2": -87.5, "logps_train/ref_2_w": -78.5, "rewards_train/1-2": 1.0869698524475098, "rewards_train/1-l": -1.4226922988891602, "rewards_train/1-w": 1.6059224605560303, "rewards_train/2-2": 1.5137873888015747, "rewards_train/2-w": 0.9872352480888367, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0286147594451904, "rewards_train/margins_1": 0.5189526081085205, "rewards_train/margins_2": 0.526552140712738, "step": 287 }, { "epoch": 0.86, "logps_train/policy_1_2": -94.76578521728516, "logps_train/policy_1_l": -106.07426452636719, "logps_train/policy_1_w": -89.97550201416016, "logps_train/policy_2_2": -76.96131896972656, "logps_train/policy_2_w": -110.6507339477539, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -92.5, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 0.917172372341156, "rewards_train/1-l": -0.6855509281158447, "rewards_train/1-w": 2.0801844596862793, "rewards_train/2-2": 1.5413686037063599, "rewards_train/2-w": 1.293520450592041, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.765735387802124, "rewards_train/margins_1": 1.1630120873451233, "rewards_train/margins_2": 0.24784815311431885, "step": 287 }, { "epoch": 0.86, "learning_rate": 3.2772388242088283e-06, "loss": 0.6838, "step": 288 }, { "epoch": 0.86, "logps_train/policy_1_2": -212.82696533203125, "logps_train/policy_1_l": -181.20045471191406, "logps_train/policy_1_w": -142.2259521484375, "logps_train/policy_2_2": -170.71792602539062, "logps_train/policy_2_w": -176.72689819335938, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.7204289436340332, "rewards_train/1-l": -2.589186668395996, "rewards_train/1-w": 2.87271785736084, "rewards_train/2-2": 3.3711767196655273, "rewards_train/2-w": 1.7460591793060303, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.461904525756836, "rewards_train/margins_1": 1.1522889137268066, "rewards_train/margins_2": 1.625117540359497, "step": 288 }, { "epoch": 0.86, "logps_train/policy_1_2": -193.36709594726562, "logps_train/policy_1_l": -200.98565673828125, "logps_train/policy_1_w": -168.41610717773438, "logps_train/policy_2_2": -161.76504516601562, "logps_train/policy_2_w": -211.76483154296875, "logps_train/ref_1_2": -213.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": 1.9945414066314697, "rewards_train/1-l": -2.0954408645629883, "rewards_train/1-w": 2.6076080799102783, "rewards_train/2-2": 3.246931791305542, "rewards_train/2-w": 0.7352345585823059, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.703048944473267, "rewards_train/margins_1": 0.6130666732788086, "rewards_train/margins_2": 2.511697232723236, "step": 288 }, { "epoch": 0.86, "logps_train/policy_1_2": -205.99078369140625, "logps_train/policy_1_l": -205.91714477539062, "logps_train/policy_1_w": -198.528076171875, "logps_train/policy_2_2": -160.732666015625, "logps_train/policy_2_w": -265.6443176269531, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -228.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -276.0, "rewards_train/1-2": 1.707172155380249, "rewards_train/1-l": -2.8760898113250732, "rewards_train/1-w": 3.0440664291381836, "rewards_train/2-2": 2.8704833984375, "rewards_train/2-w": 1.129319190979004, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.920156240463257, "rewards_train/margins_1": 1.3368942737579346, "rewards_train/margins_2": 1.741164207458496, "step": 288 }, { "epoch": 0.86, "logps_train/policy_1_2": -123.72895812988281, "logps_train/policy_1_l": -141.8461456298828, "logps_train/policy_1_w": -93.72796630859375, "logps_train/policy_2_2": -93.99327850341797, "logps_train/policy_2_w": -121.09295654296875, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -115.5, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": 1.0864797830581665, "rewards_train/1-l": -2.3050246238708496, "rewards_train/1-w": 2.1958560943603516, "rewards_train/2-2": 1.9092659950256348, "rewards_train/2-w": 1.8043769598007202, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.500880718231201, "rewards_train/margins_1": 1.109376311302185, "rewards_train/margins_2": 0.10488903522491455, "step": 288 }, { "epoch": 0.86, "logps_train/policy_1_2": -163.39199829101562, "logps_train/policy_1_l": -176.36260986328125, "logps_train/policy_1_w": -164.8578338623047, "logps_train/policy_2_2": -126.28451538085938, "logps_train/policy_2_w": -203.79544067382812, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.0451737642288208, "rewards_train/1-l": -2.686260461807251, "rewards_train/1-w": 3.2134342193603516, "rewards_train/2-2": 2.1504547595977783, "rewards_train/2-w": 1.6278774738311768, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.8996946811676025, "rewards_train/margins_1": 2.1682604551315308, "rewards_train/margins_2": 0.5225772857666016, "step": 288 }, { "epoch": 0.86, "logps_train/policy_1_2": -116.85101318359375, "logps_train/policy_1_l": -94.46383666992188, "logps_train/policy_1_w": -102.83134460449219, "logps_train/policy_2_2": -84.70295715332031, "logps_train/policy_2_w": -130.06631469726562, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": 1.4898979663848877, "rewards_train/1-l": -1.12392258644104, "rewards_train/1-w": 1.7418653964996338, "rewards_train/2-2": 2.4144697189331055, "rewards_train/2-w": 0.883994460105896, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.865787982940674, "rewards_train/margins_1": 0.2519674301147461, "rewards_train/margins_2": 1.5304752588272095, "step": 288 }, { "epoch": 0.86, "logps_train/policy_1_2": -163.0097198486328, "logps_train/policy_1_l": -135.89508056640625, "logps_train/policy_1_w": -144.98924255371094, "logps_train/policy_2_2": -127.79032897949219, "logps_train/policy_2_w": -198.04736328125, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 2.163090705871582, "rewards_train/1-l": -1.375055193901062, "rewards_train/1-w": 3.2006845474243164, "rewards_train/2-2": 3.2686233520507812, "rewards_train/2-w": 1.4175307750701904, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.575739741325378, "rewards_train/margins_1": 1.0375938415527344, "rewards_train/margins_2": 1.8510925769805908, "step": 288 }, { "epoch": 0.86, "logps_train/policy_1_2": -111.51469421386719, "logps_train/policy_1_l": -139.1232452392578, "logps_train/policy_1_w": -96.07232666015625, "logps_train/policy_2_2": -89.88353729248047, "logps_train/policy_2_w": -124.76144409179688, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 1.4356393814086914, "rewards_train/1-l": -1.9004113674163818, "rewards_train/1-w": 1.995891809463501, "rewards_train/2-2": 2.1585211753845215, "rewards_train/2-w": 1.2801051139831543, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.896303176879883, "rewards_train/margins_1": 0.5602524280548096, "rewards_train/margins_2": 0.8784160614013672, "step": 288 }, { "epoch": 0.87, "logps_train/policy_1_2": -199.54547119140625, "logps_train/policy_1_l": -222.55662536621094, "logps_train/policy_1_w": -135.2687530517578, "logps_train/policy_2_2": -157.30755615234375, "logps_train/policy_2_w": -193.995849609375, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": 1.0438902378082275, "rewards_train/1-l": -1.7962868213653564, "rewards_train/1-w": 2.963749408721924, "rewards_train/2-2": 2.308501958847046, "rewards_train/2-w": 1.6785390377044678, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.76003623008728, "rewards_train/margins_1": 1.9198591709136963, "rewards_train/margins_2": 0.6299629211425781, "step": 289 }, { "epoch": 0.87, "logps_train/policy_1_2": -109.79637145996094, "logps_train/policy_1_l": -147.60183715820312, "logps_train/policy_1_w": -100.38995361328125, "logps_train/policy_2_2": -87.37548828125, "logps_train/policy_2_w": -136.84939575195312, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.368409514427185, "rewards_train/1-l": -1.8814733028411865, "rewards_train/1-w": 1.9969425201416016, "rewards_train/2-2": 2.1095213890075684, "rewards_train/2-w": 1.1291229724884033, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.878415822982788, "rewards_train/margins_1": 0.6285330057144165, "rewards_train/margins_2": 0.980398416519165, "step": 289 }, { "epoch": 0.87, "logps_train/policy_1_2": -120.42384338378906, "logps_train/policy_1_l": -113.01179504394531, "logps_train/policy_1_w": -133.89456176757812, "logps_train/policy_2_2": -90.91131591796875, "logps_train/policy_2_w": -161.35142517089844, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.676365613937378, "rewards_train/1-l": -1.015730857849121, "rewards_train/1-w": 2.528512716293335, "rewards_train/2-2": 2.1041810512542725, "rewards_train/2-w": 1.646107792854309, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.544243574142456, "rewards_train/margins_1": 0.852147102355957, "rewards_train/margins_2": 0.4580732583999634, "step": 289 }, { "epoch": 0.87, "logps_train/policy_1_2": -91.10517120361328, "logps_train/policy_1_l": -93.02389526367188, "logps_train/policy_1_w": -60.013248443603516, "logps_train/policy_2_2": -71.6008529663086, "logps_train/policy_2_w": -79.28218841552734, "logps_train/ref_1_2": -105.0, "logps_train/ref_1_l": -79.0, "logps_train/ref_1_w": -73.5, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -86.5, "rewards_train/1-2": 1.4113578796386719, "rewards_train/1-l": -1.3937963247299194, "rewards_train/1-w": 1.3584407567977905, "rewards_train/2-2": 1.9789772033691406, "rewards_train/2-w": 0.7483437061309814, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.75223708152771, "rewards_train/margins_1": -0.05291712284088135, "rewards_train/margins_2": 1.2306334972381592, "step": 289 }, { "epoch": 0.87, "logps_train/policy_1_2": -106.33819580078125, "logps_train/policy_1_l": -171.4909210205078, "logps_train/policy_1_w": -111.66621398925781, "logps_train/policy_2_2": -82.72119140625, "logps_train/policy_2_w": -150.90380859375, "logps_train/ref_1_2": -121.5, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.5286805629730225, "rewards_train/1-l": -1.8131545782089233, "rewards_train/1-w": 2.3700966835021973, "rewards_train/2-2": 2.231006383895874, "rewards_train/2-w": 1.6635265350341797, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.183251261711121, "rewards_train/margins_1": 0.8414161205291748, "rewards_train/margins_2": 0.5674798488616943, "step": 289 }, { "epoch": 0.87, "logps_train/policy_1_2": -149.365966796875, "logps_train/policy_1_l": -168.5255126953125, "logps_train/policy_1_w": -102.53263854980469, "logps_train/policy_2_2": -121.31025695800781, "logps_train/policy_2_w": -143.85105895996094, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 0.866527795791626, "rewards_train/1-l": -2.051086187362671, "rewards_train/1-w": 1.7631422281265259, "rewards_train/2-2": 1.8361623287200928, "rewards_train/2-w": 0.6734880805015564, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.8142284154891968, "rewards_train/margins_1": 0.8966144323348999, "rewards_train/margins_2": 1.1626742482185364, "step": 289 }, { "epoch": 0.87, "logps_train/policy_1_2": -119.77542114257812, "logps_train/policy_1_l": -127.98442077636719, "logps_train/policy_1_w": -94.7673568725586, "logps_train/policy_2_2": -93.0101318359375, "logps_train/policy_2_w": -118.86503601074219, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -125.5, "rewards_train/1-2": 1.2943326234817505, "rewards_train/1-l": -1.1853065490722656, "rewards_train/1-w": 1.5513888597488403, "rewards_train/2-2": 2.1278929710388184, "rewards_train/2-w": 0.653731107711792, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.736695408821106, "rewards_train/margins_1": 0.25705623626708984, "rewards_train/margins_2": 1.4741618633270264, "step": 289 }, { "epoch": 0.87, "logps_train/policy_1_2": -158.640625, "logps_train/policy_1_l": -173.40802001953125, "logps_train/policy_1_w": -144.3840789794922, "logps_train/policy_2_2": -125.21340942382812, "logps_train/policy_2_w": -186.36981201171875, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 0.7906245589256287, "rewards_train/1-l": -2.020488739013672, "rewards_train/1-w": 3.2334675788879395, "rewards_train/2-2": 2.0669398307800293, "rewards_train/2-w": 1.9192689657211304, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.253956317901611, "rewards_train/margins_1": 2.442843019962311, "rewards_train/margins_2": 0.14767086505889893, "step": 289 }, { "epoch": 0.87, "learning_rate": 3.253727156621508e-06, "loss": 0.5682, "step": 290 }, { "epoch": 0.87, "logps_train/policy_1_2": -173.61228942871094, "logps_train/policy_1_l": -218.76332092285156, "logps_train/policy_1_w": -182.39047241210938, "logps_train/policy_2_2": -134.32376098632812, "logps_train/policy_2_w": -240.27325439453125, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -219.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -253.0, "rewards_train/1-2": 1.5989270210266113, "rewards_train/1-l": -2.2083632946014404, "rewards_train/1-w": 3.658608913421631, "rewards_train/2-2": 2.627779722213745, "rewards_train/2-w": 1.339863657951355, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.866972208023071, "rewards_train/margins_1": 2.0596818923950195, "rewards_train/margins_2": 1.2879160642623901, "step": 290 }, { "epoch": 0.87, "logps_train/policy_1_2": -124.57666015625, "logps_train/policy_1_l": -114.42382049560547, "logps_train/policy_1_w": -141.2554931640625, "logps_train/policy_2_2": -109.34591674804688, "logps_train/policy_2_w": -174.643798828125, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -101.5, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.3552250862121582, "rewards_train/1-l": -1.3271476030349731, "rewards_train/1-w": 2.7549195289611816, "rewards_train/2-2": 1.8429467678070068, "rewards_train/2-w": 1.5160887241363525, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.082067131996155, "rewards_train/margins_1": 1.3996944427490234, "rewards_train/margins_2": 0.3268580436706543, "step": 290 }, { "epoch": 0.87, "logps_train/policy_1_2": -231.3019561767578, "logps_train/policy_1_l": -206.24713134765625, "logps_train/policy_1_w": -213.6691436767578, "logps_train/policy_2_2": -186.64755249023438, "logps_train/policy_2_w": -256.4464416503906, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -238.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -264.0, "rewards_train/1-2": 1.4948046207427979, "rewards_train/1-l": -2.3229546546936035, "rewards_train/1-w": 2.5065228939056396, "rewards_train/2-2": 3.2039942741394043, "rewards_train/2-w": 0.6912935972213745, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.829477548599243, "rewards_train/margins_1": 1.0117182731628418, "rewards_train/margins_2": 2.51270067691803, "step": 290 }, { "epoch": 0.87, "logps_train/policy_1_2": -144.7421417236328, "logps_train/policy_1_l": -189.06503295898438, "logps_train/policy_1_w": -121.40744018554688, "logps_train/policy_2_2": -123.46671295166016, "logps_train/policy_2_w": -155.5010986328125, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.3656290769577026, "rewards_train/1-l": -2.231503486633301, "rewards_train/1-w": 2.4272241592407227, "rewards_train/2-2": 1.9197347164154053, "rewards_train/2-w": 1.5139522552490234, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.658727645874023, "rewards_train/margins_1": 1.06159508228302, "rewards_train/margins_2": 0.40578246116638184, "step": 290 }, { "epoch": 0.87, "logps_train/policy_1_2": -212.88526916503906, "logps_train/policy_1_l": -213.17129516601562, "logps_train/policy_1_w": -169.92013549804688, "logps_train/policy_2_2": -164.43310546875, "logps_train/policy_2_w": -225.52328491210938, "logps_train/ref_1_2": -225.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 1.2489737272262573, "rewards_train/1-l": -2.687004566192627, "rewards_train/1-w": 3.63376784324646, "rewards_train/2-2": 3.0379390716552734, "rewards_train/2-w": 1.4922020435333252, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.320772409439087, "rewards_train/margins_1": 2.3847941160202026, "rewards_train/margins_2": 1.5457370281219482, "step": 290 }, { "epoch": 0.87, "logps_train/policy_1_2": -134.24893188476562, "logps_train/policy_1_l": -113.64173889160156, "logps_train/policy_1_w": -77.87215423583984, "logps_train/policy_2_2": -101.68698120117188, "logps_train/policy_2_w": -105.07979583740234, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -95.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -112.0, "rewards_train/1-2": 0.700107753276825, "rewards_train/1-l": -1.6780411005020142, "rewards_train/1-w": 1.7209875583648682, "rewards_train/2-2": 1.8938015699386597, "rewards_train/2-w": 0.7361609935760498, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3990286588668823, "rewards_train/margins_1": 1.0208798050880432, "rewards_train/margins_2": 1.1576405763626099, "step": 290 }, { "epoch": 0.87, "logps_train/policy_1_2": -231.50820922851562, "logps_train/policy_1_l": -203.87298583984375, "logps_train/policy_1_w": -154.34906005859375, "logps_train/policy_2_2": -188.43988037109375, "logps_train/policy_2_w": -195.19122314453125, "logps_train/ref_1_2": -247.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -223.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 1.5429301261901855, "rewards_train/1-l": -1.8466734886169434, "rewards_train/1-w": 3.154938220977783, "rewards_train/2-2": 3.4497623443603516, "rewards_train/2-w": 2.0121283531188965, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.001611709594727, "rewards_train/margins_1": 1.6120080947875977, "rewards_train/margins_2": 1.437633991241455, "step": 290 }, { "epoch": 0.87, "logps_train/policy_1_2": -115.728759765625, "logps_train/policy_1_l": -112.60560607910156, "logps_train/policy_1_w": -128.04440307617188, "logps_train/policy_2_2": -101.69403076171875, "logps_train/policy_2_w": -147.63829040527344, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -105.5, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.4989993572235107, "rewards_train/1-l": -0.7261862754821777, "rewards_train/1-w": 1.5549354553222656, "rewards_train/2-2": 1.8415346145629883, "rewards_train/2-w": 1.072108507156372, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.2811217308044434, "rewards_train/margins_1": 0.05593609809875488, "rewards_train/margins_2": 0.7694261074066162, "step": 290 }, { "epoch": 0.87, "logps_train/policy_1_2": -158.47740173339844, "logps_train/policy_1_l": -133.04855346679688, "logps_train/policy_1_w": -133.72857666015625, "logps_train/policy_2_2": -124.51676940917969, "logps_train/policy_2_w": -171.8021240234375, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 0.5428847670555115, "rewards_train/1-l": -1.284542441368103, "rewards_train/1-w": 2.402923583984375, "rewards_train/2-2": 2.128011465072632, "rewards_train/2-w": 0.8408816456794739, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.687466025352478, "rewards_train/margins_1": 1.8600388169288635, "rewards_train/margins_2": 1.287129819393158, "step": 291 }, { "epoch": 0.87, "logps_train/policy_1_2": -117.00015258789062, "logps_train/policy_1_l": -169.386962890625, "logps_train/policy_1_w": -128.06813049316406, "logps_train/policy_2_2": -103.85836791992188, "logps_train/policy_2_w": -152.20538330078125, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.5921725034713745, "rewards_train/1-l": -1.5699470043182373, "rewards_train/1-w": 1.8651591539382935, "rewards_train/2-2": 1.9524445533752441, "rewards_train/2-w": 1.1535825729370117, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4351061582565308, "rewards_train/margins_1": 0.27298665046691895, "rewards_train/margins_2": 0.7988619804382324, "step": 291 }, { "epoch": 0.87, "logps_train/policy_1_2": -97.81877136230469, "logps_train/policy_1_l": -117.56874084472656, "logps_train/policy_1_w": -103.58087158203125, "logps_train/policy_2_2": -68.0609359741211, "logps_train/policy_2_w": -141.08340454101562, "logps_train/ref_1_2": -108.5, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.050934910774231, "rewards_train/1-l": -1.6670302152633667, "rewards_train/1-w": 1.348944902420044, "rewards_train/2-2": 1.6564064025878906, "rewards_train/2-w": 0.23697099089622498, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.0159751176834106, "rewards_train/margins_1": 0.298009991645813, "rewards_train/margins_2": 1.4194354116916656, "step": 291 }, { "epoch": 0.87, "logps_train/policy_1_2": -113.04307556152344, "logps_train/policy_1_l": -140.11973571777344, "logps_train/policy_1_w": -90.19760131835938, "logps_train/policy_2_2": -89.45942687988281, "logps_train/policy_2_w": -113.0618667602539, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": 0.8253803849220276, "rewards_train/1-l": -1.2442493438720703, "rewards_train/1-w": 1.5454738140106201, "rewards_train/2-2": 1.5924360752105713, "rewards_train/2-w": 0.7332664132118225, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7897231578826904, "rewards_train/margins_1": 0.7200934290885925, "rewards_train/margins_2": 0.8591696619987488, "step": 291 }, { "epoch": 0.87, "logps_train/policy_1_2": -152.13514709472656, "logps_train/policy_1_l": -129.44354248046875, "logps_train/policy_1_w": -167.6044158935547, "logps_train/policy_2_2": -122.65636444091797, "logps_train/policy_2_w": -201.93380737304688, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.9614851474761963, "rewards_train/1-l": -1.3123221397399902, "rewards_train/1-w": 2.208308458328247, "rewards_train/2-2": 2.9281134605407715, "rewards_train/2-w": 1.0269317626953125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5206305980682373, "rewards_train/margins_1": 0.24682331085205078, "rewards_train/margins_2": 1.901181697845459, "step": 291 }, { "epoch": 0.87, "logps_train/policy_1_2": -179.35174560546875, "logps_train/policy_1_l": -161.74365234375, "logps_train/policy_1_w": -127.40716552734375, "logps_train/policy_2_2": -149.0260772705078, "logps_train/policy_2_w": -156.15142822265625, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": 1.3101389408111572, "rewards_train/1-l": -1.754052996635437, "rewards_train/1-w": 2.1879942417144775, "rewards_train/2-2": 2.3036422729492188, "rewards_train/2-w": 1.0899351835250854, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9420472383499146, "rewards_train/margins_1": 0.8778553009033203, "rewards_train/margins_2": 1.2137070894241333, "step": 291 }, { "epoch": 0.87, "logps_train/policy_1_2": -195.42495727539062, "logps_train/policy_1_l": -177.99484252929688, "logps_train/policy_1_w": -169.1866455078125, "logps_train/policy_2_2": -167.39097595214844, "logps_train/policy_2_w": -208.1475830078125, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.3418803215026855, "rewards_train/1-l": -1.3940162658691406, "rewards_train/1-w": 2.462585687637329, "rewards_train/2-2": 2.426527500152588, "rewards_train/2-w": 1.425865888595581, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8566019535064697, "rewards_train/margins_1": 1.1207053661346436, "rewards_train/margins_2": 1.0006616115570068, "step": 291 }, { "epoch": 0.87, "logps_train/policy_1_2": -88.51656341552734, "logps_train/policy_1_l": -86.0118179321289, "logps_train/policy_1_w": -93.27474975585938, "logps_train/policy_2_2": -72.16458129882812, "logps_train/policy_2_w": -116.1613540649414, "logps_train/ref_1_2": -96.5, "logps_train/ref_1_l": -76.5, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -83.5, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 0.8182657957077026, "rewards_train/1-l": -0.932041585445404, "rewards_train/1-w": 1.4787743091583252, "rewards_train/2-2": 1.1101044416427612, "rewards_train/2-w": 0.7182394862174988, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.4108158946037292, "rewards_train/margins_1": 0.6605085134506226, "rewards_train/margins_2": 0.39186495542526245, "step": 291 }, { "epoch": 0.87, "learning_rate": 3.23014192659244e-06, "loss": 0.7269, "step": 292 }, { "epoch": 0.87, "logps_train/policy_1_2": -201.92811584472656, "logps_train/policy_1_l": -166.73043823242188, "logps_train/policy_1_w": -149.48800659179688, "logps_train/policy_2_2": -149.4168701171875, "logps_train/policy_2_w": -194.21853637695312, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.1353121995925903, "rewards_train/1-l": -1.5503886938095093, "rewards_train/1-w": 3.0426058769226074, "rewards_train/2-2": 3.295811891555786, "rewards_train/2-w": 1.6093958616256714, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.592994570732117, "rewards_train/margins_1": 1.907293677330017, "rewards_train/margins_2": 1.6864160299301147, "step": 292 }, { "epoch": 0.87, "logps_train/policy_1_2": -152.49734497070312, "logps_train/policy_1_l": -168.38186645507812, "logps_train/policy_1_w": -131.47320556640625, "logps_train/policy_2_2": -105.24365997314453, "logps_train/policy_2_w": -188.04074096679688, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.4065148830413818, "rewards_train/1-l": -2.2170920372009277, "rewards_train/1-w": 2.318305492401123, "rewards_train/2-2": 2.3350093364715576, "rewards_train/2-w": 1.1709256172180176, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.535397529602051, "rewards_train/margins_1": 0.9117906093597412, "rewards_train/margins_2": 1.16408371925354, "step": 292 }, { "epoch": 0.87, "logps_train/policy_1_2": -146.97735595703125, "logps_train/policy_1_l": -144.79196166992188, "logps_train/policy_1_w": -122.92848205566406, "logps_train/policy_2_2": -124.38187408447266, "logps_train/policy_2_w": -139.55950927734375, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 0.7257018685340881, "rewards_train/1-l": -1.0620074272155762, "rewards_train/1-w": 1.2829324007034302, "rewards_train/2-2": 1.7004843950271606, "rewards_train/2-w": 0.8745179772377014, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.3449398279190063, "rewards_train/margins_1": 0.557230532169342, "rewards_train/margins_2": 0.8259664177894592, "step": 292 }, { "epoch": 0.87, "logps_train/policy_1_2": -208.44631958007812, "logps_train/policy_1_l": -199.03375244140625, "logps_train/policy_1_w": -129.96701049804688, "logps_train/policy_2_2": -167.61856079101562, "logps_train/policy_2_w": -170.40057373046875, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.3061492443084717, "rewards_train/1-l": -1.5044506788253784, "rewards_train/1-w": 2.8513453006744385, "rewards_train/2-2": 2.5607995986938477, "rewards_train/2-w": 1.6943174600601196, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.355795979499817, "rewards_train/margins_1": 1.5451960563659668, "rewards_train/margins_2": 0.866482138633728, "step": 292 }, { "epoch": 0.87, "logps_train/policy_1_2": -137.64720153808594, "logps_train/policy_1_l": -137.2720947265625, "logps_train/policy_1_w": -101.84295654296875, "logps_train/policy_2_2": -122.87506866455078, "logps_train/policy_2_w": -117.17723846435547, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.6243422031402588, "rewards_train/1-l": -1.3004767894744873, "rewards_train/1-w": 1.7166557312011719, "rewards_train/2-2": 2.056243419647217, "rewards_train/2-w": 1.2822766304016113, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.017132520675659, "rewards_train/margins_1": 0.09231352806091309, "rewards_train/margins_2": 0.7739667892456055, "step": 292 }, { "epoch": 0.87, "logps_train/policy_1_2": -124.53443908691406, "logps_train/policy_1_l": -107.42617797851562, "logps_train/policy_1_w": -151.5552520751953, "logps_train/policy_2_2": -105.08515930175781, "logps_train/policy_2_w": -178.21543884277344, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.4449939727783203, "rewards_train/1-l": -0.937198281288147, "rewards_train/1-w": 2.8647875785827637, "rewards_train/2-2": 1.8567919731140137, "rewards_train/2-w": 1.8518930673599243, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8019858598709106, "rewards_train/margins_1": 1.4197936058044434, "rewards_train/margins_2": 0.0048989057540893555, "step": 292 }, { "epoch": 0.87, "logps_train/policy_1_2": -98.67039489746094, "logps_train/policy_1_l": -112.44010925292969, "logps_train/policy_1_w": -86.53982543945312, "logps_train/policy_2_2": -85.47310638427734, "logps_train/policy_2_w": -98.16661071777344, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -104.5, "logps_train/ref_2_w": -110.5, "rewards_train/1-2": 1.4153822660446167, "rewards_train/1-l": -1.179557204246521, "rewards_train/1-w": 1.6628146171569824, "rewards_train/2-2": 1.8925334215164185, "rewards_train/2-w": 1.2319717407226562, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8423718214035034, "rewards_train/margins_1": 0.24743235111236572, "rewards_train/margins_2": 0.6605616807937622, "step": 292 }, { "epoch": 0.87, "logps_train/policy_1_2": -142.0279083251953, "logps_train/policy_1_l": -188.03152465820312, "logps_train/policy_1_w": -181.09442138671875, "logps_train/policy_2_2": -110.78787231445312, "logps_train/policy_2_w": -234.01559448242188, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 1.6925220489501953, "rewards_train/1-l": -1.891432762145996, "rewards_train/1-w": 2.9085278511047363, "rewards_train/2-2": 2.763400077819824, "rewards_train/2-w": 1.312503695487976, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.799960613250732, "rewards_train/margins_1": 1.216005802154541, "rewards_train/margins_2": 1.4508963823318481, "step": 292 }, { "epoch": 0.88, "logps_train/policy_1_2": -192.29835510253906, "logps_train/policy_1_l": -209.7962646484375, "logps_train/policy_1_w": -148.62496948242188, "logps_train/policy_2_2": -145.48870849609375, "logps_train/policy_2_w": -207.76947021484375, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.4357898235321045, "rewards_train/1-l": -1.9296258687973022, "rewards_train/1-w": 3.312502861022949, "rewards_train/2-2": 2.896441698074341, "rewards_train/2-w": 1.1011782884597778, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.2421287298202515, "rewards_train/margins_1": 1.8767130374908447, "rewards_train/margins_2": 1.795263409614563, "step": 293 }, { "epoch": 0.88, "logps_train/policy_1_2": -193.61700439453125, "logps_train/policy_1_l": -291.00030517578125, "logps_train/policy_1_w": -196.9400634765625, "logps_train/policy_2_2": -169.42625427246094, "logps_train/policy_2_w": -229.674560546875, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -252.0, "logps_train/ref_1_w": -221.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -242.0, "rewards_train/1-2": 1.539860486984253, "rewards_train/1-l": -3.9250288009643555, "rewards_train/1-w": 2.3966188430786133, "rewards_train/2-2": 2.3386242389678955, "rewards_train/2-w": 1.1716067790985107, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.321647644042969, "rewards_train/margins_1": 0.8567583560943604, "rewards_train/margins_2": 1.1670174598693848, "step": 293 }, { "epoch": 0.88, "logps_train/policy_1_2": -149.8868865966797, "logps_train/policy_1_l": -154.44778442382812, "logps_train/policy_1_w": -141.11322021484375, "logps_train/policy_2_2": -98.06251525878906, "logps_train/policy_2_w": -184.293212890625, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.6206860542297363, "rewards_train/1-l": -1.3481959104537964, "rewards_train/1-w": 2.7918028831481934, "rewards_train/2-2": 2.824998378753662, "rewards_train/2-w": 1.7988033294677734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.13999879360199, "rewards_train/margins_1": 1.171116828918457, "rewards_train/margins_2": 1.0261950492858887, "step": 293 }, { "epoch": 0.88, "logps_train/policy_1_2": -192.37158203125, "logps_train/policy_1_l": -112.63485717773438, "logps_train/policy_1_w": -158.63092041015625, "logps_train/policy_2_2": -148.72454833984375, "logps_train/policy_2_w": -195.02154541015625, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": 0.35502904653549194, "rewards_train/1-l": -0.8683683276176453, "rewards_train/1-w": 2.905658006668091, "rewards_train/2-2": 1.9025448560714722, "rewards_train/2-w": 1.6462823152542114, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.774026334285736, "rewards_train/margins_1": 2.550628960132599, "rewards_train/margins_2": 0.25626254081726074, "step": 293 }, { "epoch": 0.88, "logps_train/policy_1_2": -200.6673583984375, "logps_train/policy_1_l": -152.46299743652344, "logps_train/policy_1_w": -116.48343658447266, "logps_train/policy_2_2": -159.10919189453125, "logps_train/policy_2_w": -144.95611572265625, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.8805292844772339, "rewards_train/1-l": -1.430284023284912, "rewards_train/1-w": 1.8047816753387451, "rewards_train/2-2": 3.2496285438537598, "rewards_train/2-w": 0.9325145483016968, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2350656986236572, "rewards_train/margins_1": -0.07574760913848877, "rewards_train/margins_2": 2.317113995552063, "step": 293 }, { "epoch": 0.88, "logps_train/policy_1_2": -297.69049072265625, "logps_train/policy_1_l": -188.9736785888672, "logps_train/policy_1_w": -143.9733123779297, "logps_train/policy_2_2": -254.31826782226562, "logps_train/policy_2_w": -175.7684783935547, "logps_train/ref_1_2": -324.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -292.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 2.5559496879577637, "rewards_train/1-l": -1.604692816734314, "rewards_train/1-w": 2.6917312145233154, "rewards_train/2-2": 3.779110908508301, "rewards_train/2-w": 1.5075280666351318, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.296424031257629, "rewards_train/margins_1": 0.13578152656555176, "rewards_train/margins_2": 2.271582841873169, "step": 293 }, { "epoch": 0.88, "logps_train/policy_1_2": -180.04977416992188, "logps_train/policy_1_l": -200.94317626953125, "logps_train/policy_1_w": -137.271240234375, "logps_train/policy_2_2": -139.58273315429688, "logps_train/policy_2_w": -175.13052368164062, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.1043968200683594, "rewards_train/1-l": -1.696662425994873, "rewards_train/1-w": 3.3941640853881836, "rewards_train/2-2": 2.122976779937744, "rewards_train/2-w": 2.0760104656219482, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.090826511383057, "rewards_train/margins_1": 2.289767265319824, "rewards_train/margins_2": 0.0469663143157959, "step": 293 }, { "epoch": 0.88, "logps_train/policy_1_2": -172.7862548828125, "logps_train/policy_1_l": -145.67568969726562, "logps_train/policy_1_w": -139.443603515625, "logps_train/policy_2_2": -143.7354736328125, "logps_train/policy_2_w": -171.8299560546875, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.6604387760162354, "rewards_train/1-l": -0.9269455671310425, "rewards_train/1-w": 2.616577625274658, "rewards_train/2-2": 2.4170784950256348, "rewards_train/2-w": 1.5670051574707031, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5435231924057007, "rewards_train/margins_1": 0.9561388492584229, "rewards_train/margins_2": 0.8500733375549316, "step": 293 }, { "epoch": 0.88, "learning_rate": 3.2064854359984976e-06, "loss": 0.666, "step": 294 }, { "epoch": 0.88, "logps_train/policy_1_2": -215.28375244140625, "logps_train/policy_1_l": -173.99249267578125, "logps_train/policy_1_w": -103.14178466796875, "logps_train/policy_2_2": -166.66783142089844, "logps_train/policy_2_w": -128.62255859375, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 0.8716257214546204, "rewards_train/1-l": -1.7679986953735352, "rewards_train/1-w": 1.967657208442688, "rewards_train/2-2": 2.733217477798462, "rewards_train/2-w": 1.3018068075180054, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.735655903816223, "rewards_train/margins_1": 1.0960314869880676, "rewards_train/margins_2": 1.4314106702804565, "step": 294 }, { "epoch": 0.88, "logps_train/policy_1_2": -201.84950256347656, "logps_train/policy_1_l": -173.21942138671875, "logps_train/policy_1_w": -130.79432678222656, "logps_train/policy_2_2": -159.20806884765625, "logps_train/policy_2_w": -166.69046020507812, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.549424171447754, "rewards_train/1-l": -2.8520214557647705, "rewards_train/1-w": 3.451817274093628, "rewards_train/2-2": 3.466693639755249, "rewards_train/2-w": 2.3098597526550293, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.303838729858398, "rewards_train/margins_1": 1.902393102645874, "rewards_train/margins_2": 1.1568338871002197, "step": 294 }, { "epoch": 0.88, "logps_train/policy_1_2": -150.56124877929688, "logps_train/policy_1_l": -188.60997009277344, "logps_train/policy_1_w": -134.11767578125, "logps_train/policy_2_2": -128.01934814453125, "logps_train/policy_2_w": -167.0252685546875, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.9861109256744385, "rewards_train/1-l": -0.7127553224563599, "rewards_train/1-w": 1.735156774520874, "rewards_train/2-2": 2.6373233795166016, "rewards_train/2-w": 0.8367319107055664, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.447912096977234, "rewards_train/margins_1": -0.25095415115356445, "rewards_train/margins_2": 1.8005914688110352, "step": 294 }, { "epoch": 0.88, "logps_train/policy_1_2": -130.98162841796875, "logps_train/policy_1_l": -92.31414794921875, "logps_train/policy_1_w": -99.32987976074219, "logps_train/policy_2_2": -94.43087768554688, "logps_train/policy_2_w": -133.22610473632812, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -84.5, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -117.5, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.099494457244873, "rewards_train/1-l": -0.790984570980072, "rewards_train/1-w": 3.0779500007629395, "rewards_train/2-2": 2.3069121837615967, "rewards_train/2-w": 1.8023903369903564, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8689345717430115, "rewards_train/margins_1": 1.9784555435180664, "rewards_train/margins_2": 0.5045218467712402, "step": 294 }, { "epoch": 0.88, "logps_train/policy_1_2": -147.44383239746094, "logps_train/policy_1_l": -118.39112854003906, "logps_train/policy_1_w": -108.54949951171875, "logps_train/policy_2_2": -111.54277038574219, "logps_train/policy_2_w": -145.65139770507812, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.368117094039917, "rewards_train/1-l": -1.177199125289917, "rewards_train/1-w": 2.368487596511841, "rewards_train/2-2": 2.4980671405792236, "rewards_train/2-w": 0.93486088514328, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.545686721801758, "rewards_train/margins_1": 1.0003705024719238, "rewards_train/margins_2": 1.5632062554359436, "step": 294 }, { "epoch": 0.88, "logps_train/policy_1_2": -274.3028869628906, "logps_train/policy_1_l": -222.6665802001953, "logps_train/policy_1_w": -121.01901245117188, "logps_train/policy_2_2": -208.38375854492188, "logps_train/policy_2_w": -173.43182373046875, "logps_train/ref_1_2": -282.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -238.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 0.7322116494178772, "rewards_train/1-l": -2.6315019130706787, "rewards_train/1-w": 2.283254861831665, "rewards_train/2-2": 2.974123954772949, "rewards_train/2-w": 1.2193174362182617, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.914756774902344, "rewards_train/margins_1": 1.5510432124137878, "rewards_train/margins_2": 1.7548065185546875, "step": 294 }, { "epoch": 0.88, "logps_train/policy_1_2": -177.8140106201172, "logps_train/policy_1_l": -189.7628173828125, "logps_train/policy_1_w": -112.84974670410156, "logps_train/policy_2_2": -138.47314453125, "logps_train/policy_2_w": -146.001708984375, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.934224009513855, "rewards_train/1-l": -1.9430773258209229, "rewards_train/1-w": 2.5208845138549805, "rewards_train/2-2": 3.0597176551818848, "rewards_train/2-w": 1.3416261672973633, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.463961839675903, "rewards_train/margins_1": 0.5866605043411255, "rewards_train/margins_2": 1.7180914878845215, "step": 294 }, { "epoch": 0.88, "logps_train/policy_1_2": -151.76495361328125, "logps_train/policy_1_l": -107.77288818359375, "logps_train/policy_1_w": -102.24577331542969, "logps_train/policy_2_2": -118.38473510742188, "logps_train/policy_2_w": -121.19358825683594, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.079755187034607, "rewards_train/1-l": -1.2932064533233643, "rewards_train/1-w": 1.7754220962524414, "rewards_train/2-2": 2.4396514892578125, "rewards_train/2-w": 1.0931410789489746, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0686285495758057, "rewards_train/margins_1": 0.6956669092178345, "rewards_train/margins_2": 1.346510410308838, "step": 294 }, { "epoch": 0.88, "logps_train/policy_1_2": -121.28817749023438, "logps_train/policy_1_l": -151.08724975585938, "logps_train/policy_1_w": -135.75184631347656, "logps_train/policy_2_2": -93.29600524902344, "logps_train/policy_2_w": -166.32611083984375, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 0.6586824059486389, "rewards_train/1-l": -1.4837262630462646, "rewards_train/1-w": 2.7029409408569336, "rewards_train/2-2": 1.5703990459442139, "rewards_train/2-w": 1.7095773220062256, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.186667203903198, "rewards_train/margins_1": 2.0442585349082947, "rewards_train/margins_2": -0.13917827606201172, "step": 295 }, { "epoch": 0.88, "logps_train/policy_1_2": -141.73641967773438, "logps_train/policy_1_l": -221.28765869140625, "logps_train/policy_1_w": -138.75692749023438, "logps_train/policy_2_2": -110.01561737060547, "logps_train/policy_2_w": -175.42974853515625, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.4201090335845947, "rewards_train/1-l": -3.0898985862731934, "rewards_train/1-w": 3.424307107925415, "rewards_train/2-2": 2.1734390258789062, "rewards_train/2-w": 1.7320256233215332, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.514205694198608, "rewards_train/margins_1": 2.0041980743408203, "rewards_train/margins_2": 0.44141340255737305, "step": 295 }, { "epoch": 0.88, "logps_train/policy_1_2": -243.36227416992188, "logps_train/policy_1_l": -266.936279296875, "logps_train/policy_1_w": -185.4562530517578, "logps_train/policy_2_2": -182.26974487304688, "logps_train/policy_2_w": -247.05194091796875, "logps_train/ref_1_2": -264.0, "logps_train/ref_1_l": -240.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -258.0, "rewards_train/1-2": 2.057521343231201, "rewards_train/1-l": -2.767845630645752, "rewards_train/1-w": 3.135624647140503, "rewards_train/2-2": 4.041774749755859, "rewards_train/2-w": 1.0760552883148193, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.903470277786255, "rewards_train/margins_1": 1.0781033039093018, "rewards_train/margins_2": 2.96571946144104, "step": 295 }, { "epoch": 0.88, "logps_train/policy_1_2": -117.11601257324219, "logps_train/policy_1_l": -131.63255310058594, "logps_train/policy_1_w": -72.67008972167969, "logps_train/policy_2_2": -97.87373352050781, "logps_train/policy_2_w": -90.19773864746094, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -100.5, "rewards_train/1-2": 0.9075393676757812, "rewards_train/1-l": -1.062474250793457, "rewards_train/1-w": 1.59744393825531, "rewards_train/2-2": 1.4899702072143555, "rewards_train/2-w": 1.031788945198059, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.659918189048767, "rewards_train/margins_1": 0.6899045705795288, "rewards_train/margins_2": 0.4581812620162964, "step": 295 }, { "epoch": 0.88, "logps_train/policy_1_2": -244.93634033203125, "logps_train/policy_1_l": -282.4110107421875, "logps_train/policy_1_w": -178.67388916015625, "logps_train/policy_2_2": -200.5124969482422, "logps_train/policy_2_w": -218.09764099121094, "logps_train/ref_1_2": -256.0, "logps_train/ref_1_l": -256.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -224.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.0657411813735962, "rewards_train/1-l": -2.516101121902466, "rewards_train/1-w": 2.5388612747192383, "rewards_train/2-2": 2.423750400543213, "rewards_train/2-w": 1.5777359008789062, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.054962396621704, "rewards_train/margins_1": 1.473120093345642, "rewards_train/margins_2": 0.8460144996643066, "step": 295 }, { "epoch": 0.88, "logps_train/policy_1_2": -97.44868469238281, "logps_train/policy_1_l": -74.86369323730469, "logps_train/policy_1_w": -81.45012664794922, "logps_train/policy_2_2": -80.53739166259766, "logps_train/policy_2_w": -100.77212524414062, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -99.5, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 1.4617722034454346, "rewards_train/1-l": -0.4816814661026001, "rewards_train/1-w": 2.0253000259399414, "rewards_train/2-2": 1.902510643005371, "rewards_train/2-w": 1.3040374517440796, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.5069814920425415, "rewards_train/margins_1": 0.5635278224945068, "rewards_train/margins_2": 0.5984731912612915, "step": 295 }, { "epoch": 0.88, "logps_train/policy_1_2": -150.6365966796875, "logps_train/policy_1_l": -170.90353393554688, "logps_train/policy_1_w": -103.56769561767578, "logps_train/policy_2_2": -121.93795776367188, "logps_train/policy_2_w": -131.1894989013672, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 1.3326284885406494, "rewards_train/1-l": -2.1114463806152344, "rewards_train/1-w": 2.302410125732422, "rewards_train/2-2": 2.1866722106933594, "rewards_train/2-w": 1.3736283779144287, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.413856506347656, "rewards_train/margins_1": 0.9697816371917725, "rewards_train/margins_2": 0.8130438327789307, "step": 295 }, { "epoch": 0.88, "logps_train/policy_1_2": -170.41879272460938, "logps_train/policy_1_l": -201.73880004882812, "logps_train/policy_1_w": -174.6005096435547, "logps_train/policy_2_2": -132.13204956054688, "logps_train/policy_2_w": -226.57444763183594, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 0.7503085136413574, "rewards_train/1-l": -0.5801306962966919, "rewards_train/1-w": 2.941512107849121, "rewards_train/2-2": 1.9004676342010498, "rewards_train/2-w": 1.1206796169281006, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.521642804145813, "rewards_train/margins_1": 2.1912035942077637, "rewards_train/margins_2": 0.7797880172729492, "step": 295 }, { "epoch": 0.89, "learning_rate": 3.182759993671458e-06, "loss": 0.6567, "step": 296 }, { "epoch": 0.89, "logps_train/policy_1_2": -174.52972412109375, "logps_train/policy_1_l": -180.3028564453125, "logps_train/policy_1_w": -132.49819946289062, "logps_train/policy_2_2": -131.73611450195312, "logps_train/policy_2_w": -159.16558837890625, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.6751530170440674, "rewards_train/1-l": -1.600597620010376, "rewards_train/1-w": 2.3158059120178223, "rewards_train/2-2": 2.7560760974884033, "rewards_train/2-w": 1.267815351486206, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.9164035320281982, "rewards_train/margins_1": 0.6406528949737549, "rewards_train/margins_2": 1.4882607460021973, "step": 296 }, { "epoch": 0.89, "logps_train/policy_1_2": -172.77439880371094, "logps_train/policy_1_l": -139.095703125, "logps_train/policy_1_w": -141.93658447265625, "logps_train/policy_2_2": -135.69036865234375, "logps_train/policy_2_w": -179.3470916748047, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.2428724765777588, "rewards_train/1-l": -0.8522467613220215, "rewards_train/1-w": 2.5063419342041016, "rewards_train/2-2": 2.7340874671936035, "rewards_train/2-w": 1.2465417385101318, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.358588695526123, "rewards_train/margins_1": 1.2634694576263428, "rewards_train/margins_2": 1.4875457286834717, "step": 296 }, { "epoch": 0.89, "logps_train/policy_1_2": -97.488037109375, "logps_train/policy_1_l": -111.64080810546875, "logps_train/policy_1_w": -101.17967224121094, "logps_train/policy_2_2": -75.61839294433594, "logps_train/policy_2_w": -123.76546478271484, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -90.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 0.6644776463508606, "rewards_train/1-l": -1.1391792297363281, "rewards_train/1-w": 1.550050973892212, "rewards_train/2-2": 1.4393327236175537, "rewards_train/2-w": 0.4298505187034607, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.68923020362854, "rewards_train/margins_1": 0.8855733275413513, "rewards_train/margins_2": 1.009482204914093, "step": 296 }, { "epoch": 0.89, "logps_train/policy_1_2": -116.16868591308594, "logps_train/policy_1_l": -84.49784851074219, "logps_train/policy_1_w": -74.05302429199219, "logps_train/policy_2_2": -85.10728454589844, "logps_train/policy_2_w": -89.87112426757812, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -73.0, "logps_train/ref_1_w": -92.5, "logps_train/ref_2_2": -104.5, "logps_train/ref_2_w": -102.5, "rewards_train/1-2": 0.9550073146820068, "rewards_train/1-l": -1.1620895862579346, "rewards_train/1-w": 1.8526082038879395, "rewards_train/2-2": 1.9267710447311401, "rewards_train/2-w": 1.2550749778747559, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.014697790145874, "rewards_train/margins_1": 0.8976008892059326, "rewards_train/margins_2": 0.6716960668563843, "step": 296 }, { "epoch": 0.89, "logps_train/policy_1_2": -78.46206665039062, "logps_train/policy_1_l": -107.42888641357422, "logps_train/policy_1_w": -43.393951416015625, "logps_train/policy_2_2": -56.581512451171875, "logps_train/policy_2_w": -62.351341247558594, "logps_train/ref_1_2": -83.0, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -55.0, "logps_train/ref_2_2": -67.5, "logps_train/ref_2_w": -69.0, "rewards_train/1-2": 0.45271915197372437, "rewards_train/1-l": -0.848650336265564, "rewards_train/1-w": 1.1509369611740112, "rewards_train/2-2": 1.0929226875305176, "rewards_train/2-w": 0.6655495762825012, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.9995872974395752, "rewards_train/margins_1": 0.6982178092002869, "rewards_train/margins_2": 0.42737311124801636, "step": 296 }, { "epoch": 0.89, "logps_train/policy_1_2": -170.65750122070312, "logps_train/policy_1_l": -171.28138732910156, "logps_train/policy_1_w": -158.540283203125, "logps_train/policy_2_2": -119.7317886352539, "logps_train/policy_2_w": -213.11419677734375, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 0.530735433101654, "rewards_train/1-l": -1.3197405338287354, "rewards_train/1-w": 1.6401118040084839, "rewards_train/2-2": 2.073305606842041, "rewards_train/2-w": 0.4045964181423187, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.9598523378372192, "rewards_train/margins_1": 1.1093763709068298, "rewards_train/margins_2": 1.6687091886997223, "step": 296 }, { "epoch": 0.89, "logps_train/policy_1_2": -136.5383758544922, "logps_train/policy_1_l": -142.69390869140625, "logps_train/policy_1_w": -116.40911865234375, "logps_train/policy_2_2": -109.7053451538086, "logps_train/policy_2_w": -139.21743774414062, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 0.6180376410484314, "rewards_train/1-l": -1.103374719619751, "rewards_train/1-w": 1.2684627771377563, "rewards_train/2-2": 1.3872779607772827, "rewards_train/2-w": 0.7723965644836426, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.3718374967575073, "rewards_train/margins_1": 0.650425136089325, "rewards_train/margins_2": 0.6148813962936401, "step": 296 }, { "epoch": 0.89, "logps_train/policy_1_2": -238.0285186767578, "logps_train/policy_1_l": -189.129638671875, "logps_train/policy_1_w": -148.880859375, "logps_train/policy_2_2": -196.20193481445312, "logps_train/policy_2_w": -177.03042602539062, "logps_train/ref_1_2": -247.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -219.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 0.8971481919288635, "rewards_train/1-l": -1.292259931564331, "rewards_train/1-w": 2.0775394439697266, "rewards_train/2-2": 2.2954320907592773, "rewards_train/2-w": 1.4407085180282593, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.3697993755340576, "rewards_train/margins_1": 1.180391252040863, "rewards_train/margins_2": 0.8547235727310181, "step": 296 }, { "epoch": 0.89, "logps_train/policy_1_2": -173.3822479248047, "logps_train/policy_1_l": -154.58810424804688, "logps_train/policy_1_w": -121.06361389160156, "logps_train/policy_2_2": -144.14857482910156, "logps_train/policy_2_w": -155.07150268554688, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 1.5398999452590942, "rewards_train/1-l": -1.4666228294372559, "rewards_train/1-w": 2.2819204330444336, "rewards_train/2-2": 2.5609242916107178, "rewards_train/2-w": 1.4037885665893555, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7485432624816895, "rewards_train/margins_1": 0.7420204877853394, "rewards_train/margins_2": 1.1571357250213623, "step": 297 }, { "epoch": 0.89, "logps_train/policy_1_2": -193.12860107421875, "logps_train/policy_1_l": -165.936279296875, "logps_train/policy_1_w": -166.92306518554688, "logps_train/policy_2_2": -160.76220703125, "logps_train/policy_2_w": -211.14083862304688, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 2.176593542098999, "rewards_train/1-l": -0.658081591129303, "rewards_train/1-w": 3.6170690059661865, "rewards_train/2-2": 3.321436882019043, "rewards_train/2-w": 2.257791519165039, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.2751505970954895, "rewards_train/margins_1": 1.4404754638671875, "rewards_train/margins_2": 1.063645362854004, "step": 297 }, { "epoch": 0.89, "logps_train/policy_1_2": -100.79288482666016, "logps_train/policy_1_l": -99.31785583496094, "logps_train/policy_1_w": -56.10626983642578, "logps_train/policy_2_2": -85.01768493652344, "logps_train/policy_2_w": -70.99093627929688, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -72.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -83.5, "rewards_train/1-2": 1.0747153759002686, "rewards_train/1-l": -1.2325667142868042, "rewards_train/1-w": 1.5799980163574219, "rewards_train/2-2": 1.7659077644348145, "rewards_train/2-w": 1.2618440389633179, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.812564730644226, "rewards_train/margins_1": 0.5052826404571533, "rewards_train/margins_2": 0.5040637254714966, "step": 297 }, { "epoch": 0.89, "logps_train/policy_1_2": -176.62213134765625, "logps_train/policy_1_l": -124.57102966308594, "logps_train/policy_1_w": -123.53856658935547, "logps_train/policy_2_2": -145.74087524414062, "logps_train/policy_2_w": -151.41342163085938, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -114.5, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.7987236976623535, "rewards_train/1-l": -0.9758529663085938, "rewards_train/1-w": 2.557861804962158, "rewards_train/2-2": 2.895052194595337, "rewards_train/2-w": 1.4766267538070679, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.533714771270752, "rewards_train/margins_1": 0.7591381072998047, "rewards_train/margins_2": 1.418425440788269, "step": 297 }, { "epoch": 0.89, "logps_train/policy_1_2": -190.5076904296875, "logps_train/policy_1_l": -180.70919799804688, "logps_train/policy_1_w": -157.755859375, "logps_train/policy_2_2": -148.1959228515625, "logps_train/policy_2_w": -205.4947509765625, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.0695432424545288, "rewards_train/1-l": -1.4162318706512451, "rewards_train/1-w": 2.0916006565093994, "rewards_train/2-2": 2.4491567611694336, "rewards_train/2-w": 0.5786508917808533, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.5078325271606445, "rewards_train/margins_1": 1.0220574140548706, "rewards_train/margins_2": 1.8705058693885803, "step": 297 }, { "epoch": 0.89, "logps_train/policy_1_2": -200.40399169921875, "logps_train/policy_1_l": -286.8992919921875, "logps_train/policy_1_w": -235.88076782226562, "logps_train/policy_2_2": -154.65908813476562, "logps_train/policy_2_w": -300.50048828125, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -264.0, "logps_train/ref_1_w": -268.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -316.0, "rewards_train/1-2": 1.615849256515503, "rewards_train/1-l": -2.3868021965026855, "rewards_train/1-w": 3.249422073364258, "rewards_train/2-2": 2.8700294494628906, "rewards_train/2-w": 1.5936990976333618, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.636224269866943, "rewards_train/margins_1": 1.6335728168487549, "rewards_train/margins_2": 1.2763303518295288, "step": 297 }, { "epoch": 0.89, "logps_train/policy_1_2": -69.48554992675781, "logps_train/policy_1_l": -133.8507080078125, "logps_train/policy_1_w": -72.02217102050781, "logps_train/policy_2_2": -57.45567321777344, "logps_train/policy_2_w": -90.59151458740234, "logps_train/ref_1_2": -76.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -82.0, "logps_train/ref_2_2": -68.0, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": 0.6592576503753662, "rewards_train/1-l": -1.110461950302124, "rewards_train/1-w": 1.0243452787399292, "rewards_train/2-2": 1.048964262008667, "rewards_train/2-w": 0.4064733386039734, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.1348072290420532, "rewards_train/margins_1": 0.365087628364563, "rewards_train/margins_2": 0.6424909234046936, "step": 297 }, { "epoch": 0.89, "logps_train/policy_1_2": -114.51637268066406, "logps_train/policy_1_l": -136.88978576660156, "logps_train/policy_1_w": -93.3855972290039, "logps_train/policy_2_2": -97.88375854492188, "logps_train/policy_2_w": -128.1903533935547, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.442112922668457, "rewards_train/1-l": -1.8322889804840088, "rewards_train/1-w": 2.1426901817321777, "rewards_train/2-2": 1.9397492408752441, "rewards_train/2-w": 0.7762776613235474, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9749791622161865, "rewards_train/margins_1": 0.7005772590637207, "rewards_train/margins_2": 1.1634715795516968, "step": 297 }, { "epoch": 0.89, "learning_rate": 3.1589679151726693e-06, "loss": 0.8144, "step": 298 }, { "epoch": 0.89, "logps_train/policy_1_2": -217.2515106201172, "logps_train/policy_1_l": -196.6902313232422, "logps_train/policy_1_w": -212.57090759277344, "logps_train/policy_2_2": -170.12835693359375, "logps_train/policy_2_w": -281.4344482421875, "logps_train/ref_1_2": -233.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -248.0, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -296.0, "rewards_train/1-2": 1.5209426879882812, "rewards_train/1-l": -1.636600375175476, "rewards_train/1-w": 3.5163466930389404, "rewards_train/2-2": 2.7207589149475098, "rewards_train/2-w": 1.3721814155578613, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.1529470682144165, "rewards_train/margins_1": 1.9954040050506592, "rewards_train/margins_2": 1.3485774993896484, "step": 298 }, { "epoch": 0.89, "logps_train/policy_1_2": -109.63677215576172, "logps_train/policy_1_l": -77.2388687133789, "logps_train/policy_1_w": -89.30152893066406, "logps_train/policy_2_2": -83.16273498535156, "logps_train/policy_2_w": -111.96150207519531, "logps_train/ref_1_2": -119.5, "logps_train/ref_1_l": -69.5, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": 0.9941350817680359, "rewards_train/1-l": -0.7742770910263062, "rewards_train/1-w": 1.4233624935150146, "rewards_train/2-2": 1.845445156097412, "rewards_train/2-w": 0.34916266798973083, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.197639584541321, "rewards_train/margins_1": 0.42922741174697876, "rewards_train/margins_2": 1.4962824881076813, "step": 298 }, { "epoch": 0.89, "logps_train/policy_1_2": -118.95911407470703, "logps_train/policy_1_l": -77.42945861816406, "logps_train/policy_1_w": -84.98654174804688, "logps_train/policy_2_2": -96.94816589355469, "logps_train/policy_2_w": -105.40388488769531, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -68.0, "logps_train/ref_1_w": -97.5, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -109.5, "rewards_train/1-2": 0.6994011998176575, "rewards_train/1-l": -0.9399188756942749, "rewards_train/1-w": 1.2370882034301758, "rewards_train/2-2": 1.4286203384399414, "rewards_train/2-w": 0.4225018620491028, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.1770070791244507, "rewards_train/margins_1": 0.5376870036125183, "rewards_train/margins_2": 1.0061184763908386, "step": 298 }, { "epoch": 0.89, "logps_train/policy_1_2": -81.10647583007812, "logps_train/policy_1_l": -50.96786117553711, "logps_train/policy_1_w": -50.78034591674805, "logps_train/policy_2_2": -59.15166473388672, "logps_train/policy_2_w": -78.46672058105469, "logps_train/ref_1_2": -85.0, "logps_train/ref_1_l": -41.25, "logps_train/ref_1_w": -67.0, "logps_train/ref_2_2": -72.5, "logps_train/ref_2_w": -83.5, "rewards_train/1-2": 0.4096651077270508, "rewards_train/1-l": -0.95381760597229, "rewards_train/1-w": 1.603996992111206, "rewards_train/2-2": 1.3278024196624756, "rewards_train/2-w": 0.488484263420105, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.557814598083496, "rewards_train/margins_1": 1.1943318843841553, "rewards_train/margins_2": 0.8393181562423706, "step": 298 }, { "epoch": 0.89, "logps_train/policy_1_2": -116.46603393554688, "logps_train/policy_1_l": -136.68289184570312, "logps_train/policy_1_w": -149.57144165039062, "logps_train/policy_2_2": -91.08663940429688, "logps_train/policy_2_w": -186.62615966796875, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": 1.483474612236023, "rewards_train/1-l": -1.4712196588516235, "rewards_train/1-w": 2.735825300216675, "rewards_train/2-2": 1.9720005989074707, "rewards_train/2-w": 0.8795724511146545, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.207044959068298, "rewards_train/margins_1": 1.2523506879806519, "rewards_train/margins_2": 1.0924281477928162, "step": 298 }, { "epoch": 0.89, "logps_train/policy_1_2": -165.39553833007812, "logps_train/policy_1_l": -265.8838806152344, "logps_train/policy_1_w": -171.0946044921875, "logps_train/policy_2_2": -141.3079833984375, "logps_train/policy_2_w": -209.39266967773438, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -235.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 2.0151331424713135, "rewards_train/1-l": -3.0758886337280273, "rewards_train/1-w": 4.165539264678955, "rewards_train/2-2": 2.60357666015625, "rewards_train/2-w": 2.423232316970825, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 7.241427898406982, "rewards_train/margins_1": 2.1504061222076416, "rewards_train/margins_2": 0.1803443431854248, "step": 298 }, { "epoch": 0.89, "logps_train/policy_1_2": -89.99627685546875, "logps_train/policy_1_l": -85.12164306640625, "logps_train/policy_1_w": -93.20501708984375, "logps_train/policy_2_2": -66.30914306640625, "logps_train/policy_2_w": -124.0406265258789, "logps_train/ref_1_2": -99.5, "logps_train/ref_1_l": -73.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -82.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.9503728151321411, "rewards_train/1-l": -1.2231016159057617, "rewards_train/1-w": 2.3974673748016357, "rewards_train/2-2": 1.5729925632476807, "rewards_train/2-w": 1.1943747997283936, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.6205689907073975, "rewards_train/margins_1": 1.4470945596694946, "rewards_train/margins_2": 0.3786177635192871, "step": 298 }, { "epoch": 0.89, "logps_train/policy_1_2": -236.5675811767578, "logps_train/policy_1_l": -174.64508056640625, "logps_train/policy_1_w": -142.04275512695312, "logps_train/policy_2_2": -180.34619140625, "logps_train/policy_2_w": -192.95655822753906, "logps_train/ref_1_2": -252.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.4744919538497925, "rewards_train/1-l": -2.089020252227783, "rewards_train/1-w": 2.3761940002441406, "rewards_train/2-2": 3.3341312408447266, "rewards_train/2-w": 1.1098132133483887, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.465214252471924, "rewards_train/margins_1": 0.9017020463943481, "rewards_train/margins_2": 2.224318027496338, "step": 298 }, { "epoch": 0.9, "logps_train/policy_1_2": -90.18756103515625, "logps_train/policy_1_l": -84.4100341796875, "logps_train/policy_1_w": -83.64244842529297, "logps_train/policy_2_2": -71.1289291381836, "logps_train/policy_2_w": -111.98677825927734, "logps_train/ref_1_2": -98.5, "logps_train/ref_1_l": -71.5, "logps_train/ref_1_w": -98.5, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 0.8328061103820801, "rewards_train/1-l": -1.298718810081482, "rewards_train/1-w": 1.486682653427124, "rewards_train/2-2": 1.4519509077072144, "rewards_train/2-w": 0.4090364873409271, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.785401463508606, "rewards_train/margins_1": 0.653876543045044, "rewards_train/margins_2": 1.0429144203662872, "step": 299 }, { "epoch": 0.9, "logps_train/policy_1_2": -124.78274536132812, "logps_train/policy_1_l": -115.07861328125, "logps_train/policy_1_w": -91.74309539794922, "logps_train/policy_2_2": -95.4509506225586, "logps_train/policy_2_w": -119.87633514404297, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 1.1561003923416138, "rewards_train/1-l": -1.9822752475738525, "rewards_train/1-w": 1.7538154125213623, "rewards_train/2-2": 2.2392802238464355, "rewards_train/2-w": 0.6878553032875061, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.736090660095215, "rewards_train/margins_1": 0.5977150201797485, "rewards_train/margins_2": 1.5514249205589294, "step": 299 }, { "epoch": 0.9, "logps_train/policy_1_2": -112.91726684570312, "logps_train/policy_1_l": -184.5999298095703, "logps_train/policy_1_w": -116.00313568115234, "logps_train/policy_2_2": -94.27008056640625, "logps_train/policy_2_w": -143.6813201904297, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.0813201665878296, "rewards_train/1-l": -2.097102165222168, "rewards_train/1-w": 2.4801554679870605, "rewards_train/2-2": 1.6444770097732544, "rewards_train/2-w": 1.4162427186965942, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.5772576332092285, "rewards_train/margins_1": 1.398835301399231, "rewards_train/margins_2": 0.22823429107666016, "step": 299 }, { "epoch": 0.9, "logps_train/policy_1_2": -235.7897186279297, "logps_train/policy_1_l": -278.3460998535156, "logps_train/policy_1_w": -159.92034912109375, "logps_train/policy_2_2": -186.0855712890625, "logps_train/policy_2_w": -196.30787658691406, "logps_train/ref_1_2": -249.0, "logps_train/ref_1_l": -244.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": 1.396028757095337, "rewards_train/1-l": -3.4904696941375732, "rewards_train/1-w": 2.7874584197998047, "rewards_train/2-2": 3.572692394256592, "rewards_train/2-w": 1.7254629135131836, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.277928113937378, "rewards_train/margins_1": 1.3914296627044678, "rewards_train/margins_2": 1.8472294807434082, "step": 299 }, { "epoch": 0.9, "logps_train/policy_1_2": -219.5108642578125, "logps_train/policy_1_l": -218.8890838623047, "logps_train/policy_1_w": -148.12652587890625, "logps_train/policy_2_2": -173.77349853515625, "logps_train/policy_2_w": -201.83175659179688, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -207.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -207.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.594224452972412, "rewards_train/1-l": -1.2135180234909058, "rewards_train/1-w": 2.0365676879882812, "rewards_train/2-2": 3.324212074279785, "rewards_train/2-w": 0.7996370196342468, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.250085711479187, "rewards_train/margins_1": 0.44234323501586914, "rewards_train/margins_2": 2.5245750546455383, "step": 299 }, { "epoch": 0.9, "logps_train/policy_1_2": -174.81039428710938, "logps_train/policy_1_l": -182.18487548828125, "logps_train/policy_1_w": -141.29736328125, "logps_train/policy_2_2": -150.38717651367188, "logps_train/policy_2_w": -175.8265380859375, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.5608066320419312, "rewards_train/1-l": -1.3755199909210205, "rewards_train/1-w": 2.7910642623901367, "rewards_train/2-2": 2.488429307937622, "rewards_train/2-w": 1.5745729207992554, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.166584253311157, "rewards_train/margins_1": 1.2302576303482056, "rewards_train/margins_2": 0.9138563871383667, "step": 299 }, { "epoch": 0.9, "logps_train/policy_1_2": -218.56106567382812, "logps_train/policy_1_l": -155.9721221923828, "logps_train/policy_1_w": -139.12794494628906, "logps_train/policy_2_2": -173.7603759765625, "logps_train/policy_2_w": -203.78878784179688, "logps_train/ref_1_2": -237.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.831392526626587, "rewards_train/1-l": -1.563227891921997, "rewards_train/1-w": 2.345799207687378, "rewards_train/2-2": 3.2817745208740234, "rewards_train/2-w": 0.48947978019714355, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.909027099609375, "rewards_train/margins_1": 0.514406681060791, "rewards_train/margins_2": 2.79229474067688, "step": 299 }, { "epoch": 0.9, "logps_train/policy_1_2": -166.32302856445312, "logps_train/policy_1_l": -188.56417846679688, "logps_train/policy_1_w": -145.98489379882812, "logps_train/policy_2_2": -130.55856323242188, "logps_train/policy_2_w": -193.54034423828125, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.863008975982666, "rewards_train/1-l": -1.9372782707214355, "rewards_train/1-w": 3.1827616691589355, "rewards_train/2-2": 2.899613618850708, "rewards_train/2-w": 1.8006525039672852, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.120039939880371, "rewards_train/margins_1": 1.3197526931762695, "rewards_train/margins_2": 1.0989611148834229, "step": 299 }, { "epoch": 0.9, "learning_rate": 3.1351115225670483e-06, "loss": 0.7411, "step": 300 }, { "epoch": 0.9, "logps_train/policy_1_2": -27.772666931152344, "logps_train/policy_1_l": -52.25096130371094, "logps_train/policy_1_w": -42.385414123535156, "logps_train/policy_2_2": -19.673046112060547, "logps_train/policy_2_w": -60.46350860595703, "logps_train/ref_1_2": -31.25, "logps_train/ref_1_l": -43.0, "logps_train/ref_1_w": -55.25, "logps_train/ref_2_2": -26.0, "logps_train/ref_2_w": -66.5, "rewards_train/1-2": 0.3576456308364868, "rewards_train/1-l": -0.9222155809402466, "rewards_train/1-w": 1.299935221672058, "rewards_train/2-2": 0.6274219751358032, "rewards_train/2-w": 0.6102895736694336, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.2221508026123047, "rewards_train/margins_1": 0.9422895908355713, "rewards_train/margins_2": 0.01713240146636963, "step": 300 }, { "epoch": 0.9, "logps_train/policy_1_2": -109.90704345703125, "logps_train/policy_1_l": -110.21311950683594, "logps_train/policy_1_w": -83.08218383789062, "logps_train/policy_2_2": -84.17977142333984, "logps_train/policy_2_w": -122.20842742919922, "logps_train/ref_1_2": -118.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 0.7762882113456726, "rewards_train/1-l": -1.2452373504638672, "rewards_train/1-w": 1.7617032527923584, "rewards_train/2-2": 1.2931561470031738, "rewards_train/2-w": 0.6518136262893677, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0069406032562256, "rewards_train/margins_1": 0.9854150414466858, "rewards_train/margins_2": 0.6413425207138062, "step": 300 }, { "epoch": 0.9, "logps_train/policy_1_2": -149.35289001464844, "logps_train/policy_1_l": -204.79962158203125, "logps_train/policy_1_w": -133.83563232421875, "logps_train/policy_2_2": -124.90027618408203, "logps_train/policy_2_w": -164.15988159179688, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 2.111586570739746, "rewards_train/1-l": -3.1127734184265137, "rewards_train/1-w": 2.435185432434082, "rewards_train/2-2": 2.7224721908569336, "rewards_train/2-w": 1.4293243885040283, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.547958850860596, "rewards_train/margins_1": 0.32359886169433594, "rewards_train/margins_2": 1.2931478023529053, "step": 300 }, { "epoch": 0.9, "logps_train/policy_1_2": -105.11259460449219, "logps_train/policy_1_l": -124.66899871826172, "logps_train/policy_1_w": -88.30315399169922, "logps_train/policy_2_2": -83.2898178100586, "logps_train/policy_2_w": -114.95797729492188, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -96.5, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 0.8715529441833496, "rewards_train/1-l": -1.4475642442703247, "rewards_train/1-w": 2.0649971961975098, "rewards_train/2-2": 1.3108623027801514, "rewards_train/2-w": 1.5405306816101074, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.5125614404678345, "rewards_train/margins_1": 1.1934442520141602, "rewards_train/margins_2": -0.22966837882995605, "step": 300 }, { "epoch": 0.9, "logps_train/policy_1_2": -193.06747436523438, "logps_train/policy_1_l": -124.11444091796875, "logps_train/policy_1_w": -126.47032165527344, "logps_train/policy_2_2": -158.40719604492188, "logps_train/policy_2_w": -153.33592224121094, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 0.7487233877182007, "rewards_train/1-l": -0.8808776140213013, "rewards_train/1-w": 1.683436632156372, "rewards_train/2-2": 1.6358426809310913, "rewards_train/2-w": 0.8679695129394531, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.5643142461776733, "rewards_train/margins_1": 0.9347132444381714, "rewards_train/margins_2": 0.7678731679916382, "step": 300 }, { "epoch": 0.9, "logps_train/policy_1_2": -139.95584106445312, "logps_train/policy_1_l": -123.64976501464844, "logps_train/policy_1_w": -134.35049438476562, "logps_train/policy_2_2": -113.1959457397461, "logps_train/policy_2_w": -174.11720275878906, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.1380095481872559, "rewards_train/1-l": -0.762730598449707, "rewards_train/1-w": 2.7837014198303223, "rewards_train/2-2": 2.090951919555664, "rewards_train/2-w": 1.1523419618606567, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.5464320182800293, "rewards_train/margins_1": 1.6456918716430664, "rewards_train/margins_2": 0.9386099576950073, "step": 300 }, { "epoch": 0.9, "logps_train/policy_1_2": -203.36288452148438, "logps_train/policy_1_l": -138.08151245117188, "logps_train/policy_1_w": -133.55047607421875, "logps_train/policy_2_2": -170.91603088378906, "logps_train/policy_2_w": -159.9278564453125, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.6980855464935303, "rewards_train/1-l": -0.43588656187057495, "rewards_train/1-w": 2.2230775356292725, "rewards_train/2-2": 2.9474596977233887, "rewards_train/2-w": 1.5822144746780396, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.6589640974998474, "rewards_train/margins_1": 0.5249919891357422, "rewards_train/margins_2": 1.3652452230453491, "step": 300 }, { "epoch": 0.9, "logps_train/policy_1_2": -201.13023376464844, "logps_train/policy_1_l": -187.2120361328125, "logps_train/policy_1_w": -149.53594970703125, "logps_train/policy_2_2": -163.11758422851562, "logps_train/policy_2_w": -195.6166534423828, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.7760391235351562, "rewards_train/1-l": -1.5289924144744873, "rewards_train/1-w": 2.633904457092285, "rewards_train/2-2": 3.3116793632507324, "rewards_train/2-w": 1.2289588451385498, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.1628968715667725, "rewards_train/margins_1": 0.8578653335571289, "rewards_train/margins_2": 2.0827205181121826, "step": 300 }, { "epoch": 0.9, "logps_train/policy_1_2": -121.418701171875, "logps_train/policy_1_l": -117.7625732421875, "logps_train/policy_1_w": -83.90418243408203, "logps_train/policy_2_2": -94.43473052978516, "logps_train/policy_2_w": -119.16446685791016, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 0.6846927404403687, "rewards_train/1-l": -1.2330939769744873, "rewards_train/1-w": 2.1173946857452393, "rewards_train/2-2": 1.779964566230774, "rewards_train/2-w": 1.0210528373718262, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.3504886627197266, "rewards_train/margins_1": 1.4327019453048706, "rewards_train/margins_2": 0.7589117288589478, "step": 301 }, { "epoch": 0.9, "logps_train/policy_1_2": -86.53138732910156, "logps_train/policy_1_l": -103.77274322509766, "logps_train/policy_1_w": -62.135643005371094, "logps_train/policy_2_2": -70.1571273803711, "logps_train/policy_2_w": -82.74409484863281, "logps_train/ref_1_2": -95.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -77.0, "logps_train/ref_2_2": -83.0, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": 0.851939857006073, "rewards_train/1-l": -0.7930947542190552, "rewards_train/1-w": 1.479794979095459, "rewards_train/2-2": 1.304599404335022, "rewards_train/2-w": 0.9326213002204895, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.272889733314514, "rewards_train/margins_1": 0.627855122089386, "rewards_train/margins_2": 0.37197810411453247, "step": 301 }, { "epoch": 0.9, "logps_train/policy_1_2": -140.4798583984375, "logps_train/policy_1_l": -150.6002197265625, "logps_train/policy_1_w": -106.14838409423828, "logps_train/policy_2_2": -101.8388671875, "logps_train/policy_2_w": -147.24664306640625, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.5207629203796387, "rewards_train/1-l": -1.588487982749939, "rewards_train/1-w": 2.9230523109436035, "rewards_train/2-2": 3.034862756729126, "rewards_train/2-w": 1.7991644144058228, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.5115402936935425, "rewards_train/margins_1": 1.4022893905639648, "rewards_train/margins_2": 1.2356983423233032, "step": 301 }, { "epoch": 0.9, "logps_train/policy_1_2": -182.50567626953125, "logps_train/policy_1_l": -261.3518371582031, "logps_train/policy_1_w": -151.4080047607422, "logps_train/policy_2_2": -151.86270141601562, "logps_train/policy_2_w": -187.9212188720703, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -238.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 2.31193208694458, "rewards_train/1-l": -2.4109644889831543, "rewards_train/1-w": 3.1420116424560547, "rewards_train/2-2": 2.8793556690216064, "rewards_train/2-w": 2.2360026836395264, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.552976131439209, "rewards_train/margins_1": 0.8300795555114746, "rewards_train/margins_2": 0.6433529853820801, "step": 301 }, { "epoch": 0.9, "logps_train/policy_1_2": -128.41592407226562, "logps_train/policy_1_l": -161.642822265625, "logps_train/policy_1_w": -79.70580291748047, "logps_train/policy_2_2": -106.72098541259766, "logps_train/policy_2_w": -106.47219848632812, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -117.0, "rewards_train/1-2": 1.7318450212478638, "rewards_train/1-l": -2.4752931594848633, "rewards_train/1-w": 2.065356731414795, "rewards_train/2-2": 2.366182804107666, "rewards_train/2-w": 1.1027805805206299, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.540649890899658, "rewards_train/margins_1": 0.33351171016693115, "rewards_train/margins_2": 1.2634022235870361, "step": 301 }, { "epoch": 0.9, "logps_train/policy_1_2": -260.33746337890625, "logps_train/policy_1_l": -273.658447265625, "logps_train/policy_1_w": -200.95663452148438, "logps_train/policy_2_2": -226.05160522460938, "logps_train/policy_2_w": -246.8180389404297, "logps_train/ref_1_2": -282.0, "logps_train/ref_1_l": -253.0, "logps_train/ref_1_w": -234.0, "logps_train/ref_2_2": -260.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 2.150629758834839, "rewards_train/1-l": -2.0931873321533203, "rewards_train/1-w": 3.344961166381836, "rewards_train/2-2": 3.5448386669158936, "rewards_train/2-w": 2.126007556915283, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.438148498535156, "rewards_train/margins_1": 1.194331407546997, "rewards_train/margins_2": 1.4188311100006104, "step": 301 }, { "epoch": 0.9, "logps_train/policy_1_2": -114.99813079833984, "logps_train/policy_1_l": -134.1350555419922, "logps_train/policy_1_w": -87.25628662109375, "logps_train/policy_2_2": -90.40377807617188, "logps_train/policy_2_w": -116.17020416259766, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.3908119201660156, "rewards_train/1-l": -1.3744423389434814, "rewards_train/1-w": 1.7853084802627563, "rewards_train/2-2": 2.125638484954834, "rewards_train/2-w": 1.0173547267913818, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.159750819206238, "rewards_train/margins_1": 0.3944965600967407, "rewards_train/margins_2": 1.1082837581634521, "step": 301 }, { "epoch": 0.9, "logps_train/policy_1_2": -163.7510223388672, "logps_train/policy_1_l": -255.69888305664062, "logps_train/policy_1_w": -159.227294921875, "logps_train/policy_2_2": -136.1005859375, "logps_train/policy_2_w": -189.40414428710938, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -232.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.4596633911132812, "rewards_train/1-l": -2.4620749950408936, "rewards_train/1-w": 2.42258358001709, "rewards_train/2-2": 2.306739330291748, "rewards_train/2-w": 1.273648738861084, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.884658575057983, "rewards_train/margins_1": 0.9629201889038086, "rewards_train/margins_2": 1.033090591430664, "step": 301 }, { "epoch": 0.9, "learning_rate": 3.111193144196457e-06, "loss": 0.6561, "step": 302 }, { "epoch": 0.9, "logps_train/policy_1_2": -88.616455078125, "logps_train/policy_1_l": -94.93475341796875, "logps_train/policy_1_w": -104.79257202148438, "logps_train/policy_2_2": -70.91283416748047, "logps_train/policy_2_w": -127.59596252441406, "logps_train/ref_1_2": -101.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.2196044921875, "rewards_train/1-l": -1.0561277866363525, "rewards_train/1-w": 1.6060938835144043, "rewards_train/2-2": 1.613183856010437, "rewards_train/2-w": 0.6638411283493042, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.662221670150757, "rewards_train/margins_1": 0.3864893913269043, "rewards_train/margins_2": 0.9493427276611328, "step": 302 }, { "epoch": 0.9, "logps_train/policy_1_2": -241.8671112060547, "logps_train/policy_1_l": -237.48779296875, "logps_train/policy_1_w": -120.82646179199219, "logps_train/policy_2_2": -197.5728759765625, "logps_train/policy_2_w": -151.59307861328125, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -235.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.6461009979248047, "rewards_train/1-l": -3.4175291061401367, "rewards_train/1-w": 2.3181354999542236, "rewards_train/2-2": 3.7286489009857178, "rewards_train/2-w": 1.5332703590393066, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.73566460609436, "rewards_train/margins_1": 0.672034502029419, "rewards_train/margins_2": 2.195378541946411, "step": 302 }, { "epoch": 0.9, "logps_train/policy_1_2": -140.32174682617188, "logps_train/policy_1_l": -74.68831634521484, "logps_train/policy_1_w": -53.913108825683594, "logps_train/policy_2_2": -112.89641571044922, "logps_train/policy_2_w": -76.52754974365234, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -63.5, "logps_train/ref_1_w": -67.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -80.0, "rewards_train/1-2": 0.08813748508691788, "rewards_train/1-l": -1.1108238697052002, "rewards_train/1-w": 1.2813458442687988, "rewards_train/2-2": 1.3072339296340942, "rewards_train/2-w": 0.3603307008743286, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.392169713973999, "rewards_train/margins_1": 1.193208359181881, "rewards_train/margins_2": 0.9469032287597656, "step": 302 }, { "epoch": 0.9, "logps_train/policy_1_2": -150.76287841796875, "logps_train/policy_1_l": -132.29611206054688, "logps_train/policy_1_w": -101.78485107421875, "logps_train/policy_2_2": -107.81672668457031, "logps_train/policy_2_w": -138.1142578125, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.3033995628356934, "rewards_train/1-l": -1.640207052230835, "rewards_train/1-w": 2.169952392578125, "rewards_train/2-2": 2.5527021884918213, "rewards_train/2-w": 1.2198238372802734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.81015944480896, "rewards_train/margins_1": 0.8665528297424316, "rewards_train/margins_2": 1.3328783512115479, "step": 302 }, { "epoch": 0.9, "logps_train/policy_1_2": -220.3784637451172, "logps_train/policy_1_l": -232.94256591796875, "logps_train/policy_1_w": -199.77197265625, "logps_train/policy_2_2": -190.23825073242188, "logps_train/policy_2_w": -248.65623474121094, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -237.0, "logps_train/ref_2_2": -223.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 1.9965288639068604, "rewards_train/1-l": -2.7911314964294434, "rewards_train/1-w": 3.722801923751831, "rewards_train/2-2": 3.2980501651763916, "rewards_train/2-w": 2.009376049041748, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.513933420181274, "rewards_train/margins_1": 1.7262730598449707, "rewards_train/margins_2": 1.2886741161346436, "step": 302 }, { "epoch": 0.9, "logps_train/policy_1_2": -149.51927185058594, "logps_train/policy_1_l": -157.55636596679688, "logps_train/policy_1_w": -110.39300537109375, "logps_train/policy_2_2": -110.29788208007812, "logps_train/policy_2_w": -146.24212646484375, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.1449475288391113, "rewards_train/1-l": -1.4548561573028564, "rewards_train/1-w": 2.2310123443603516, "rewards_train/2-2": 2.393649101257324, "rewards_train/2-w": 1.385162115097046, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.685868501663208, "rewards_train/margins_1": 1.0860648155212402, "rewards_train/margins_2": 1.0084869861602783, "step": 302 }, { "epoch": 0.9, "logps_train/policy_1_2": -204.7255859375, "logps_train/policy_1_l": -285.3658142089844, "logps_train/policy_1_w": -178.0482177734375, "logps_train/policy_2_2": -163.89645385742188, "logps_train/policy_2_w": -219.54129028320312, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -250.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.141894817352295, "rewards_train/1-l": -3.5178310871124268, "rewards_train/1-w": 2.992053508758545, "rewards_train/2-2": 2.332231044769287, "rewards_train/2-w": 1.4458694458007812, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.509884595870972, "rewards_train/margins_1": 1.85015869140625, "rewards_train/margins_2": 0.8863615989685059, "step": 302 }, { "epoch": 0.9, "logps_train/policy_1_2": -124.31420135498047, "logps_train/policy_1_l": -131.23248291015625, "logps_train/policy_1_w": -81.03858947753906, "logps_train/policy_2_2": -105.1857681274414, "logps_train/policy_2_w": -109.34149932861328, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 1.3224866390228271, "rewards_train/1-l": -0.8384829163551331, "rewards_train/1-w": 2.0562973022460938, "rewards_train/2-2": 2.129470109939575, "rewards_train/2-w": 1.5689752101898193, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.894780218601227, "rewards_train/margins_1": 0.7338106632232666, "rewards_train/margins_2": 0.5604948997497559, "step": 302 }, { "epoch": 0.91, "logps_train/policy_1_2": -154.2369384765625, "logps_train/policy_1_l": -168.39813232421875, "logps_train/policy_1_w": -143.52630615234375, "logps_train/policy_2_2": -123.42848205566406, "logps_train/policy_2_w": -183.772216796875, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.2497421503067017, "rewards_train/1-l": -1.3482117652893066, "rewards_train/1-w": 3.174713134765625, "rewards_train/2-2": 2.2743396759033203, "rewards_train/2-w": 1.5743408203125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.522924900054932, "rewards_train/margins_1": 1.9249709844589233, "rewards_train/margins_2": 0.6999988555908203, "step": 303 }, { "epoch": 0.91, "logps_train/policy_1_2": -150.96200561523438, "logps_train/policy_1_l": -145.32325744628906, "logps_train/policy_1_w": -136.96331787109375, "logps_train/policy_2_2": -119.1548843383789, "logps_train/policy_2_w": -172.99032592773438, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.6334877014160156, "rewards_train/1-l": -1.1565450429916382, "rewards_train/1-w": 2.074761152267456, "rewards_train/2-2": 2.695449113845825, "rewards_train/2-w": 0.6548736095428467, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2313061952590942, "rewards_train/margins_1": 0.44127345085144043, "rewards_train/margins_2": 2.0405755043029785, "step": 303 }, { "epoch": 0.91, "logps_train/policy_1_2": -111.34355163574219, "logps_train/policy_1_l": -191.64443969726562, "logps_train/policy_1_w": -165.7617950439453, "logps_train/policy_2_2": -90.66094970703125, "logps_train/policy_2_w": -217.99749755859375, "logps_train/ref_1_2": -122.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -227.0, "rewards_train/1-2": 1.0727729797363281, "rewards_train/1-l": -1.9425702095031738, "rewards_train/1-w": 2.569913387298584, "rewards_train/2-2": 1.584491491317749, "rewards_train/2-w": 0.9002492427825928, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.512483596801758, "rewards_train/margins_1": 1.4971404075622559, "rewards_train/margins_2": 0.6842422485351562, "step": 303 }, { "epoch": 0.91, "logps_train/policy_1_2": -149.14974975585938, "logps_train/policy_1_l": -164.1572265625, "logps_train/policy_1_w": -111.1135482788086, "logps_train/policy_2_2": -119.23719787597656, "logps_train/policy_2_w": -133.93289184570312, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.336294174194336, "rewards_train/1-l": -2.2613277435302734, "rewards_train/1-w": 2.3972389698028564, "rewards_train/2-2": 2.548935890197754, "rewards_train/2-w": 1.5348352193832397, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.65856671333313, "rewards_train/margins_1": 1.0609447956085205, "rewards_train/margins_2": 1.0141006708145142, "step": 303 }, { "epoch": 0.91, "logps_train/policy_1_2": -177.7150421142578, "logps_train/policy_1_l": -144.3023681640625, "logps_train/policy_1_w": -96.78085327148438, "logps_train/policy_2_2": -144.60372924804688, "logps_train/policy_2_w": -106.6708984375, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -112.5, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": 1.0398240089416504, "rewards_train/1-l": -1.402503252029419, "rewards_train/1-w": 1.5976959466934204, "rewards_train/2-2": 2.4630401134490967, "rewards_train/2-w": 1.4141358137130737, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.0001991987228394, "rewards_train/margins_1": 0.55787193775177, "rewards_train/margins_2": 1.048904299736023, "step": 303 }, { "epoch": 0.91, "logps_train/policy_1_2": -137.26409912109375, "logps_train/policy_1_l": -219.79641723632812, "logps_train/policy_1_w": -192.8109130859375, "logps_train/policy_2_2": -113.58355712890625, "logps_train/policy_2_w": -232.52288818359375, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -226.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 1.726715326309204, "rewards_train/1-l": -1.8237820863723755, "rewards_train/1-w": 3.28140926361084, "rewards_train/2-2": 2.1869561672210693, "rewards_train/2-w": 1.635211706161499, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.105191349983215, "rewards_train/margins_1": 1.5546939373016357, "rewards_train/margins_2": 0.5517444610595703, "step": 303 }, { "epoch": 0.91, "logps_train/policy_1_2": -125.96974182128906, "logps_train/policy_1_l": -175.727294921875, "logps_train/policy_1_w": -69.62921905517578, "logps_train/policy_2_2": -109.97064971923828, "logps_train/policy_2_w": -87.87238311767578, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -83.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -96.0, "rewards_train/1-2": 1.4780254364013672, "rewards_train/1-l": -1.7821052074432373, "rewards_train/1-w": 1.3319998979568481, "rewards_train/2-2": 1.8119194507598877, "rewards_train/2-w": 0.8473316431045532, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.1141051054000854, "rewards_train/margins_1": -0.14602553844451904, "rewards_train/margins_2": 0.9645878076553345, "step": 303 }, { "epoch": 0.91, "logps_train/policy_1_2": -145.96983337402344, "logps_train/policy_1_l": -130.62103271484375, "logps_train/policy_1_w": -106.32180786132812, "logps_train/policy_2_2": -99.30989074707031, "logps_train/policy_2_w": -146.93031311035156, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.1748913526535034, "rewards_train/1-l": -1.9011180400848389, "rewards_train/1-w": 2.187350273132324, "rewards_train/2-2": 2.733463764190674, "rewards_train/2-w": 0.7514997124671936, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.088468313217163, "rewards_train/margins_1": 1.0124589204788208, "rewards_train/margins_2": 1.9819640517234802, "step": 303 }, { "epoch": 0.91, "learning_rate": 3.08721511445246e-06, "loss": 0.6544, "step": 304 }, { "epoch": 0.91, "logps_train/policy_1_2": -147.85423278808594, "logps_train/policy_1_l": -216.24473571777344, "logps_train/policy_1_w": -105.50406646728516, "logps_train/policy_2_2": -118.40106201171875, "logps_train/policy_2_w": -125.26097106933594, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.4880143404006958, "rewards_train/1-l": -2.2338476181030273, "rewards_train/1-w": 2.104280948638916, "rewards_train/2-2": 2.2411434650421143, "rewards_train/2-w": 1.6817153692245483, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.338128566741943, "rewards_train/margins_1": 0.6162666082382202, "rewards_train/margins_2": 0.5594280958175659, "step": 304 }, { "epoch": 0.91, "logps_train/policy_1_2": -124.69315338134766, "logps_train/policy_1_l": -120.7698974609375, "logps_train/policy_1_w": -84.15046691894531, "logps_train/policy_2_2": -100.1104736328125, "logps_train/policy_2_w": -103.32453918457031, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 1.5431853532791138, "rewards_train/1-l": -1.1699583530426025, "rewards_train/1-w": 1.639641284942627, "rewards_train/2-2": 2.270984649658203, "rewards_train/2-w": 0.7761397361755371, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8095996379852295, "rewards_train/margins_1": 0.09645593166351318, "rewards_train/margins_2": 1.494844913482666, "step": 304 }, { "epoch": 0.91, "logps_train/policy_1_2": -168.22128295898438, "logps_train/policy_1_l": -165.73184204101562, "logps_train/policy_1_w": -110.27643585205078, "logps_train/policy_2_2": -140.74215698242188, "logps_train/policy_2_w": -142.30650329589844, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 0.8255269527435303, "rewards_train/1-l": -2.0979881286621094, "rewards_train/1-w": 2.1308035850524902, "rewards_train/2-2": 1.8539097309112549, "rewards_train/2-w": 1.1088027954101562, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.2287917137146, "rewards_train/margins_1": 1.30527663230896, "rewards_train/margins_2": 0.7451069355010986, "step": 304 }, { "epoch": 0.91, "logps_train/policy_1_2": -158.38192749023438, "logps_train/policy_1_l": -203.3428955078125, "logps_train/policy_1_w": -110.153564453125, "logps_train/policy_2_2": -123.4836654663086, "logps_train/policy_2_w": -138.5570068359375, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.0352442264556885, "rewards_train/1-l": -2.217491388320923, "rewards_train/1-w": 2.5455806255340576, "rewards_train/2-2": 2.1281964778900146, "rewards_train/2-w": 1.6489871740341187, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.7630720138549805, "rewards_train/margins_1": 1.5103363990783691, "rewards_train/margins_2": 0.479209303855896, "step": 304 }, { "epoch": 0.91, "logps_train/policy_1_2": -90.5243148803711, "logps_train/policy_1_l": -64.90023803710938, "logps_train/policy_1_w": -53.00042724609375, "logps_train/policy_2_2": -63.98225402832031, "logps_train/policy_2_w": -74.00553894042969, "logps_train/ref_1_2": -96.5, "logps_train/ref_1_l": -51.25, "logps_train/ref_1_w": -71.5, "logps_train/ref_2_2": -76.0, "logps_train/ref_2_w": -84.0, "rewards_train/1-2": 0.589756429195404, "rewards_train/1-l": -1.3601410388946533, "rewards_train/1-w": 1.839020013809204, "rewards_train/2-2": 1.2056812047958374, "rewards_train/2-w": 0.978743314743042, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.1991610527038574, "rewards_train/margins_1": 1.2492635846138, "rewards_train/margins_2": 0.2269378900527954, "step": 304 }, { "epoch": 0.91, "logps_train/policy_1_2": -172.08612060546875, "logps_train/policy_1_l": -127.44354248046875, "logps_train/policy_1_w": -110.14571380615234, "logps_train/policy_2_2": -139.09947204589844, "logps_train/policy_2_w": -147.97637939453125, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -115.5, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.2767398357391357, "rewards_train/1-l": -1.196570873260498, "rewards_train/1-w": 2.3007612228393555, "rewards_train/2-2": 2.6134161949157715, "rewards_train/2-w": 0.9750194549560547, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4973320960998535, "rewards_train/margins_1": 1.0240213871002197, "rewards_train/margins_2": 1.6383967399597168, "step": 304 }, { "epoch": 0.91, "logps_train/policy_1_2": -122.34528350830078, "logps_train/policy_1_l": -130.8785400390625, "logps_train/policy_1_w": -83.39085388183594, "logps_train/policy_2_2": -92.56126403808594, "logps_train/policy_2_w": -121.1277084350586, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -107.5, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 0.7107837200164795, "rewards_train/1-l": -1.9085578918457031, "rewards_train/1-w": 2.462477445602417, "rewards_train/2-2": 1.5177017450332642, "rewards_train/2-w": 1.0044164657592773, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.37103533744812, "rewards_train/margins_1": 1.7516937255859375, "rewards_train/margins_2": 0.5132852792739868, "step": 304 }, { "epoch": 0.91, "logps_train/policy_1_2": -166.76690673828125, "logps_train/policy_1_l": -143.1029052734375, "logps_train/policy_1_w": -146.5059356689453, "logps_train/policy_2_2": -128.61563110351562, "logps_train/policy_2_w": -180.9547576904297, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.6287784576416016, "rewards_train/1-l": -1.0927119255065918, "rewards_train/1-w": 2.175969123840332, "rewards_train/2-2": 3.0261318683624268, "rewards_train/2-w": 0.6576499342918396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.268681049346924, "rewards_train/margins_1": 0.5471906661987305, "rewards_train/margins_2": 2.368481934070587, "step": 304 }, { "epoch": 0.91, "logps_train/policy_1_2": -230.14013671875, "logps_train/policy_1_l": -167.29244995117188, "logps_train/policy_1_w": -136.48023986816406, "logps_train/policy_2_2": -195.4628448486328, "logps_train/policy_2_w": -178.5890655517578, "logps_train/ref_1_2": -235.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 0.5297369956970215, "rewards_train/1-l": -1.401120662689209, "rewards_train/1-w": 2.5332260131835938, "rewards_train/2-2": 1.565434217453003, "rewards_train/2-w": 1.2192189693450928, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9343466758728027, "rewards_train/margins_1": 2.0034890174865723, "rewards_train/margins_2": 0.34621524810791016, "step": 305 }, { "epoch": 0.91, "logps_train/policy_1_2": -162.5592041015625, "logps_train/policy_1_l": -129.99169921875, "logps_train/policy_1_w": -133.28961181640625, "logps_train/policy_2_2": -132.16903686523438, "logps_train/policy_2_w": -168.95904541015625, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.270641803741455, "rewards_train/1-l": -1.6437007188796997, "rewards_train/1-w": 2.7405710220336914, "rewards_train/2-2": 2.4077064990997314, "rewards_train/2-w": 1.502532958984375, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.384271740913391, "rewards_train/margins_1": 1.4699292182922363, "rewards_train/margins_2": 0.9051735401153564, "step": 305 }, { "epoch": 0.91, "logps_train/policy_1_2": -178.45118713378906, "logps_train/policy_1_l": -166.09283447265625, "logps_train/policy_1_w": -150.5165557861328, "logps_train/policy_2_2": -134.18228149414062, "logps_train/policy_2_w": -184.10052490234375, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.2716774940490723, "rewards_train/1-l": -1.3093812465667725, "rewards_train/1-w": 2.95381236076355, "rewards_train/2-2": 2.3903660774230957, "rewards_train/2-w": 1.736040472984314, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.263193607330322, "rewards_train/margins_1": 1.6821348667144775, "rewards_train/margins_2": 0.6543256044387817, "step": 305 }, { "epoch": 0.91, "logps_train/policy_1_2": -130.58612060546875, "logps_train/policy_1_l": -160.53274536132812, "logps_train/policy_1_w": -90.78346252441406, "logps_train/policy_2_2": -109.68013763427734, "logps_train/policy_2_w": -111.52861022949219, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -124.5, "rewards_train/1-2": 0.8273252844810486, "rewards_train/1-l": -1.6040549278259277, "rewards_train/1-w": 1.9021222591400146, "rewards_train/2-2": 1.7069865465164185, "rewards_train/2-w": 1.2959675788879395, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5061771869659424, "rewards_train/margins_1": 1.074796974658966, "rewards_train/margins_2": 0.411018967628479, "step": 305 }, { "epoch": 0.91, "logps_train/policy_1_2": -121.45352935791016, "logps_train/policy_1_l": -98.05052185058594, "logps_train/policy_1_w": -108.46717834472656, "logps_train/policy_2_2": -104.95144653320312, "logps_train/policy_2_w": -138.57861328125, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -85.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.2491788864135742, "rewards_train/1-l": -1.2870841026306152, "rewards_train/1-w": 2.711277484893799, "rewards_train/2-2": 1.835714340209961, "rewards_train/2-w": 1.3919432163238525, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.998361587524414, "rewards_train/margins_1": 1.4620985984802246, "rewards_train/margins_2": 0.4437711238861084, "step": 305 }, { "epoch": 0.91, "logps_train/policy_1_2": -139.242431640625, "logps_train/policy_1_l": -129.638671875, "logps_train/policy_1_w": -115.19093322753906, "logps_train/policy_2_2": -109.07392120361328, "logps_train/policy_2_w": -149.71287536621094, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.3909921646118164, "rewards_train/1-l": -1.6891615390777588, "rewards_train/1-w": 3.830516815185547, "rewards_train/2-2": 2.321514129638672, "rewards_train/2-w": 2.5037121772766113, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.519678354263306, "rewards_train/margins_1": 2.4395246505737305, "rewards_train/margins_2": -0.18219804763793945, "step": 305 }, { "epoch": 0.91, "logps_train/policy_1_2": -160.79737854003906, "logps_train/policy_1_l": -123.24876403808594, "logps_train/policy_1_w": -97.4517593383789, "logps_train/policy_2_2": -124.88272857666016, "logps_train/policy_2_w": -114.53624725341797, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.0796369314193726, "rewards_train/1-l": -1.4157451391220093, "rewards_train/1-w": 1.8888087272644043, "rewards_train/2-2": 2.1132893562316895, "rewards_train/2-w": 1.191298007965088, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3045538663864136, "rewards_train/margins_1": 0.8091717958450317, "rewards_train/margins_2": 0.9219913482666016, "step": 305 }, { "epoch": 0.91, "logps_train/policy_1_2": -64.99441528320312, "logps_train/policy_1_l": -82.33790588378906, "logps_train/policy_1_w": -89.3471450805664, "logps_train/policy_2_2": -53.03550338745117, "logps_train/policy_2_w": -108.07797241210938, "logps_train/ref_1_2": -69.5, "logps_train/ref_1_l": -72.0, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -59.5, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 0.45036303997039795, "rewards_train/1-l": -1.0626975297927856, "rewards_train/1-w": 1.756300926208496, "rewards_train/2-2": 0.6382464170455933, "rewards_train/2-w": 0.9922025203704834, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.8189984560012817, "rewards_train/margins_1": 1.3059378862380981, "rewards_train/margins_2": -0.35395610332489014, "step": 305 }, { "epoch": 0.92, "learning_rate": 3.0631797735484877e-06, "loss": 0.7721, "step": 306 }, { "epoch": 0.92, "logps_train/policy_1_2": -96.3830795288086, "logps_train/policy_1_l": -121.45771789550781, "logps_train/policy_1_w": -59.90666961669922, "logps_train/policy_2_2": -70.33142852783203, "logps_train/policy_2_w": -79.53897857666016, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -99.5, "logps_train/ref_1_w": -76.0, "logps_train/ref_2_2": -88.0, "logps_train/ref_2_w": -90.0, "rewards_train/1-2": 0.784446120262146, "rewards_train/1-l": -2.1824898719787598, "rewards_train/1-w": 1.6210520267486572, "rewards_train/2-2": 1.7962514162063599, "rewards_train/2-w": 1.0701255798339844, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.803541898727417, "rewards_train/margins_1": 0.8366059064865112, "rewards_train/margins_2": 0.7261258363723755, "step": 306 }, { "epoch": 0.92, "logps_train/policy_1_2": -222.92332458496094, "logps_train/policy_1_l": -187.78839111328125, "logps_train/policy_1_w": -172.40948486328125, "logps_train/policy_2_2": -182.2009735107422, "logps_train/policy_2_w": -215.71214294433594, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -205.0, "logps_train/ref_2_2": -220.0, "logps_train/ref_2_w": -235.0, "rewards_train/1-2": 2.2561049461364746, "rewards_train/1-l": -2.242706298828125, "rewards_train/1-w": 3.2824888229370117, "rewards_train/2-2": 3.8158392906188965, "rewards_train/2-w": 1.9600363969802856, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.525195121765137, "rewards_train/margins_1": 1.026383876800537, "rewards_train/margins_2": 1.8558028936386108, "step": 306 }, { "epoch": 0.92, "logps_train/policy_1_2": -58.362274169921875, "logps_train/policy_1_l": -97.87252044677734, "logps_train/policy_1_w": -106.84474182128906, "logps_train/policy_2_2": -48.147987365722656, "logps_train/policy_2_w": -137.86480712890625, "logps_train/ref_1_2": -63.0, "logps_train/ref_1_l": -82.5, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -57.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 0.4797879755496979, "rewards_train/1-l": -1.5521936416625977, "rewards_train/1-w": 1.8477528095245361, "rewards_train/2-2": 0.8819787502288818, "rewards_train/2-w": 0.4782661497592926, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.399946451187134, "rewards_train/margins_1": 1.3679648339748383, "rewards_train/margins_2": 0.40371260046958923, "step": 306 }, { "epoch": 0.92, "logps_train/policy_1_2": -140.2711944580078, "logps_train/policy_1_l": -131.60423278808594, "logps_train/policy_1_w": -96.32351684570312, "logps_train/policy_2_2": -108.69395446777344, "logps_train/policy_2_w": -147.3738555908203, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -122.5, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 0.1738571971654892, "rewards_train/1-l": -1.3961889743804932, "rewards_train/1-w": 2.206125259399414, "rewards_train/2-2": 1.352869987487793, "rewards_train/2-w": 0.5762861967086792, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.6023142337799072, "rewards_train/margins_1": 2.032268062233925, "rewards_train/margins_2": 0.7765837907791138, "step": 306 }, { "epoch": 0.92, "logps_train/policy_1_2": -123.91139221191406, "logps_train/policy_1_l": -185.6494140625, "logps_train/policy_1_w": -81.71731567382812, "logps_train/policy_2_2": -100.86509704589844, "logps_train/policy_2_w": -107.99050903320312, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 1.6244856119155884, "rewards_train/1-l": -2.6551761627197266, "rewards_train/1-w": 2.1277308464050293, "rewards_train/2-2": 2.5384902954101562, "rewards_train/2-w": 1.3988981246948242, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.782907009124756, "rewards_train/margins_1": 0.5032452344894409, "rewards_train/margins_2": 1.139592170715332, "step": 306 }, { "epoch": 0.92, "logps_train/policy_1_2": -143.65756225585938, "logps_train/policy_1_l": -138.14976501464844, "logps_train/policy_1_w": -144.95018005371094, "logps_train/policy_2_2": -127.04195404052734, "logps_train/policy_2_w": -167.63442993164062, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 2.0279934406280518, "rewards_train/1-l": -1.3139023780822754, "rewards_train/1-w": 1.781837821006775, "rewards_train/2-2": 2.2583041191101074, "rewards_train/2-w": 0.753354012966156, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.0957401990890503, "rewards_train/margins_1": -0.24615561962127686, "rewards_train/margins_2": 1.5049501061439514, "step": 306 }, { "epoch": 0.92, "logps_train/policy_1_2": -160.62313842773438, "logps_train/policy_1_l": -149.4665069580078, "logps_train/policy_1_w": -107.58300018310547, "logps_train/policy_2_2": -126.63058471679688, "logps_train/policy_2_w": -146.33151245117188, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 0.5298726558685303, "rewards_train/1-l": -1.6966508626937866, "rewards_train/1-w": 2.649512767791748, "rewards_train/2-2": 1.5525665283203125, "rewards_train/2-w": 1.2230998277664185, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.346163630485535, "rewards_train/margins_1": 2.1196401119232178, "rewards_train/margins_2": 0.32946670055389404, "step": 306 }, { "epoch": 0.92, "logps_train/policy_1_2": -242.9912567138672, "logps_train/policy_1_l": -199.24130249023438, "logps_train/policy_1_w": -192.2703399658203, "logps_train/policy_2_2": -211.719970703125, "logps_train/policy_2_w": -227.10888671875, "logps_train/ref_1_2": -254.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -236.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.128218173980713, "rewards_train/1-l": -1.716318130493164, "rewards_train/1-w": 2.490154266357422, "rewards_train/2-2": 2.4701902866363525, "rewards_train/2-w": 1.103173017501831, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.206472396850586, "rewards_train/margins_1": 1.361936092376709, "rewards_train/margins_2": 1.3670172691345215, "step": 306 }, { "epoch": 0.92, "logps_train/policy_1_2": -102.63154602050781, "logps_train/policy_1_l": -65.30731964111328, "logps_train/policy_1_w": -73.39695739746094, "logps_train/policy_2_2": -76.06781768798828, "logps_train/policy_2_w": -100.95918273925781, "logps_train/ref_1_2": -112.5, "logps_train/ref_1_l": -57.5, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 0.9954389333724976, "rewards_train/1-l": -0.7913763523101807, "rewards_train/1-w": 1.8579601049423218, "rewards_train/2-2": 1.966265320777893, "rewards_train/2-w": 0.8408012390136719, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.6493364572525024, "rewards_train/margins_1": 0.8625211715698242, "rewards_train/margins_2": 1.1254640817642212, "step": 307 }, { "epoch": 0.92, "logps_train/policy_1_2": -173.36227416992188, "logps_train/policy_1_l": -175.12594604492188, "logps_train/policy_1_w": -103.06266784667969, "logps_train/policy_2_2": -137.51736450195312, "logps_train/policy_2_w": -129.47877502441406, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.0364289283752441, "rewards_train/1-l": -1.759469985961914, "rewards_train/1-w": 1.7374838590621948, "rewards_train/2-2": 2.3306853771209717, "rewards_train/2-w": 1.1896226406097412, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.496953845024109, "rewards_train/margins_1": 0.7010549306869507, "rewards_train/margins_2": 1.1410627365112305, "step": 307 }, { "epoch": 0.92, "logps_train/policy_1_2": -139.86976623535156, "logps_train/policy_1_l": -126.90251159667969, "logps_train/policy_1_w": -146.603515625, "logps_train/policy_2_2": -109.77108001708984, "logps_train/policy_2_w": -188.08815002441406, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.3837268352508545, "rewards_train/1-l": -2.0289225578308105, "rewards_train/1-w": 2.3103525638580322, "rewards_train/2-2": 2.2389075756073, "rewards_train/2-w": 0.6130596995353699, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.339275121688843, "rewards_train/margins_1": 0.9266257286071777, "rewards_train/margins_2": 1.62584787607193, "step": 307 }, { "epoch": 0.92, "logps_train/policy_1_2": -155.30592346191406, "logps_train/policy_1_l": -199.5709686279297, "logps_train/policy_1_w": -126.94355010986328, "logps_train/policy_2_2": -128.44461059570312, "logps_train/policy_2_w": -156.436767578125, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.8053451776504517, "rewards_train/1-l": -1.7020184993743896, "rewards_train/1-w": 1.9357225894927979, "rewards_train/2-2": 1.7008506059646606, "rewards_train/2-w": 1.1516356468200684, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.6377410888671875, "rewards_train/margins_1": 1.1303774118423462, "rewards_train/margins_2": 0.5492149591445923, "step": 307 }, { "epoch": 0.92, "logps_train/policy_1_2": -50.61190414428711, "logps_train/policy_1_l": -116.94449615478516, "logps_train/policy_1_w": -67.84614562988281, "logps_train/policy_2_2": -40.938289642333984, "logps_train/policy_2_w": -85.01437377929688, "logps_train/ref_1_2": -54.5, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -81.0, "logps_train/ref_2_2": -47.0, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": 0.38343876600265503, "rewards_train/1-l": -1.3835115432739258, "rewards_train/1-w": 1.2821824550628662, "rewards_train/2-2": 0.6017765998840332, "rewards_train/2-w": 0.6856725811958313, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.665693998336792, "rewards_train/margins_1": 0.8987436890602112, "rewards_train/margins_2": -0.0838959813117981, "step": 307 }, { "epoch": 0.92, "logps_train/policy_1_2": -120.2739486694336, "logps_train/policy_1_l": -141.0323486328125, "logps_train/policy_1_w": -72.77129364013672, "logps_train/policy_2_2": -98.29859924316406, "logps_train/policy_2_w": -99.02166748046875, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 0.9856913089752197, "rewards_train/1-l": -1.5946420431137085, "rewards_train/1-w": 1.7122260332107544, "rewards_train/2-2": 1.8898661136627197, "rewards_train/2-w": 1.0036927461624146, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.306868076324463, "rewards_train/margins_1": 0.7265347242355347, "rewards_train/margins_2": 0.8861733675003052, "step": 307 }, { "epoch": 0.92, "logps_train/policy_1_2": -58.90254211425781, "logps_train/policy_1_l": -96.84423828125, "logps_train/policy_1_w": -63.23208999633789, "logps_train/policy_2_2": -44.553863525390625, "logps_train/policy_2_w": -87.70498657226562, "logps_train/ref_1_2": -63.5, "logps_train/ref_1_l": -83.5, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -52.0, "logps_train/ref_2_w": -97.0, "rewards_train/1-2": 0.48152363300323486, "rewards_train/1-l": -1.3337405920028687, "rewards_train/1-w": 1.4990568161010742, "rewards_train/2-2": 0.7286957502365112, "rewards_train/2-w": 0.9486420154571533, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.832797408103943, "rewards_train/margins_1": 1.0175331830978394, "rewards_train/margins_2": -0.2199462652206421, "step": 307 }, { "epoch": 0.92, "logps_train/policy_1_2": -212.3121337890625, "logps_train/policy_1_l": -173.24070739746094, "logps_train/policy_1_w": -115.04005432128906, "logps_train/policy_2_2": -167.776123046875, "logps_train/policy_2_w": -155.60321044921875, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.0000351667404175, "rewards_train/1-l": -1.4904773235321045, "rewards_train/1-w": 3.0741193294525146, "rewards_train/2-2": 2.6833252906799316, "rewards_train/2-w": 1.6928050518035889, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.564596652984619, "rewards_train/margins_1": 2.074084162712097, "rewards_train/margins_2": 0.9905202388763428, "step": 307 }, { "epoch": 0.92, "learning_rate": 3.0390894672914427e-06, "loss": 0.7224, "step": 308 }, { "epoch": 0.92, "logps_train/policy_1_2": -96.73921203613281, "logps_train/policy_1_l": -180.68763732910156, "logps_train/policy_1_w": -111.18408203125, "logps_train/policy_2_2": -84.54228973388672, "logps_train/policy_2_w": -127.08316040039062, "logps_train/ref_1_2": -103.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 0.6323292255401611, "rewards_train/1-l": -1.7242316007614136, "rewards_train/1-w": 0.7612794637680054, "rewards_train/2-2": 0.9692084193229675, "rewards_train/2-w": 0.3721526265144348, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.485511064529419, "rewards_train/margins_1": 0.12895023822784424, "rewards_train/margins_2": 0.5970557928085327, "step": 308 }, { "epoch": 0.92, "logps_train/policy_1_2": -131.6038818359375, "logps_train/policy_1_l": -125.7738037109375, "logps_train/policy_1_w": -76.44388580322266, "logps_train/policy_2_2": -104.78876495361328, "logps_train/policy_2_w": -101.45358276367188, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 1.4114875793457031, "rewards_train/1-l": -1.8660520315170288, "rewards_train/1-w": 2.018892526626587, "rewards_train/2-2": 2.2953429222106934, "rewards_train/2-w": 1.281204104423523, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8849445581436157, "rewards_train/margins_1": 0.6074049472808838, "rewards_train/margins_2": 1.0141388177871704, "step": 308 }, { "epoch": 0.92, "logps_train/policy_1_2": -109.48619842529297, "logps_train/policy_1_l": -126.1952133178711, "logps_train/policy_1_w": -69.25898742675781, "logps_train/policy_2_2": -86.98081970214844, "logps_train/policy_2_w": -87.73104095458984, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -107.5, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -101.5, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 1.056067943572998, "rewards_train/1-l": -1.8656151294708252, "rewards_train/1-w": 1.930741548538208, "rewards_train/2-2": 1.4648079872131348, "rewards_train/2-w": 1.0497475862503052, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.796356678009033, "rewards_train/margins_1": 0.87467360496521, "rewards_train/margins_2": 0.4150604009628296, "step": 308 }, { "epoch": 0.92, "logps_train/policy_1_2": -187.74423217773438, "logps_train/policy_1_l": -124.07904052734375, "logps_train/policy_1_w": -169.44528198242188, "logps_train/policy_2_2": -138.0185089111328, "logps_train/policy_2_w": -211.9127197265625, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 0.6115154027938843, "rewards_train/1-l": -1.709503412246704, "rewards_train/1-w": 2.360548973083496, "rewards_train/2-2": 2.5645546913146973, "rewards_train/2-w": 0.7665396928787231, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.0700523853302, "rewards_train/margins_1": 1.7490335702896118, "rewards_train/margins_2": 1.7980149984359741, "step": 308 }, { "epoch": 0.92, "logps_train/policy_1_2": -77.81333923339844, "logps_train/policy_1_l": -83.13723754882812, "logps_train/policy_1_w": -82.87229919433594, "logps_train/policy_2_2": -54.657249450683594, "logps_train/policy_2_w": -113.778564453125, "logps_train/ref_1_2": -85.0, "logps_train/ref_1_l": -77.0, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -71.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 0.7139783501625061, "rewards_train/1-l": -0.5799342393875122, "rewards_train/1-w": 0.987769365310669, "rewards_train/2-2": 1.6584941148757935, "rewards_train/2-w": 0.4315189719200134, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.5677036046981812, "rewards_train/margins_1": 0.27379101514816284, "rewards_train/margins_2": 1.22697514295578, "step": 308 }, { "epoch": 0.92, "logps_train/policy_1_2": -194.01980590820312, "logps_train/policy_1_l": -204.70706176757812, "logps_train/policy_1_w": -157.43267822265625, "logps_train/policy_2_2": -173.4619903564453, "logps_train/policy_2_w": -195.7080535888672, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 2.058957099914551, "rewards_train/1-l": -3.5824263095855713, "rewards_train/1-w": 2.5711851119995117, "rewards_train/2-2": 2.604581832885742, "rewards_train/2-w": 1.0135705471038818, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.153611421585083, "rewards_train/margins_1": 0.5122280120849609, "rewards_train/margins_2": 1.5910112857818604, "step": 308 }, { "epoch": 0.92, "logps_train/policy_1_2": -64.23202514648438, "logps_train/policy_1_l": -79.69767761230469, "logps_train/policy_1_w": -45.28871154785156, "logps_train/policy_2_2": -51.23137283325195, "logps_train/policy_2_w": -66.78308868408203, "logps_train/ref_1_2": -75.0, "logps_train/ref_1_l": -65.5, "logps_train/ref_1_w": -59.0, "logps_train/ref_2_2": -66.0, "logps_train/ref_2_w": -73.0, "rewards_train/1-2": 1.0478914976119995, "rewards_train/1-l": -1.405118703842163, "rewards_train/1-w": 1.3910505771636963, "rewards_train/2-2": 1.472175121307373, "rewards_train/2-w": 0.656456708908081, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.7961692810058594, "rewards_train/margins_1": 0.3431590795516968, "rewards_train/margins_2": 0.815718412399292, "step": 308 }, { "epoch": 0.92, "logps_train/policy_1_2": -140.08055114746094, "logps_train/policy_1_l": -193.81362915039062, "logps_train/policy_1_w": -144.7334747314453, "logps_train/policy_2_2": -110.49594116210938, "logps_train/policy_2_w": -192.6622314453125, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.560694694519043, "rewards_train/1-l": -2.180654525756836, "rewards_train/1-w": 2.7305593490600586, "rewards_train/2-2": 2.5597808361053467, "rewards_train/2-w": 1.161315679550171, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.9112138748168945, "rewards_train/margins_1": 1.1698646545410156, "rewards_train/margins_2": 1.3984651565551758, "step": 308 }, { "epoch": 0.93, "logps_train/policy_1_2": -114.41064453125, "logps_train/policy_1_l": -80.5401611328125, "logps_train/policy_1_w": -80.17982482910156, "logps_train/policy_2_2": -79.30007934570312, "logps_train/policy_2_w": -111.7470703125, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -72.5, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -98.5, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 1.271435260772705, "rewards_train/1-l": -0.7930787801742554, "rewards_train/1-w": 1.6867051124572754, "rewards_train/2-2": 1.9324921369552612, "rewards_train/2-w": 0.5909179449081421, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.4797838926315308, "rewards_train/margins_1": 0.4152698516845703, "rewards_train/margins_2": 1.3415741920471191, "step": 309 }, { "epoch": 0.93, "logps_train/policy_1_2": -129.00796508789062, "logps_train/policy_1_l": -155.11378479003906, "logps_train/policy_1_w": -164.23817443847656, "logps_train/policy_2_2": -115.06793212890625, "logps_train/policy_2_w": -194.97946166992188, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.632798194885254, "rewards_train/1-l": -0.9949721097946167, "rewards_train/1-w": 2.4730582237243652, "rewards_train/2-2": 1.960590124130249, "rewards_train/2-w": 1.6436550617218018, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.468030333518982, "rewards_train/margins_1": 0.8402600288391113, "rewards_train/margins_2": 0.31693506240844727, "step": 309 }, { "epoch": 0.93, "logps_train/policy_1_2": -162.06358337402344, "logps_train/policy_1_l": -219.60614013671875, "logps_train/policy_1_w": -134.9057159423828, "logps_train/policy_2_2": -140.09048461914062, "logps_train/policy_2_w": -162.74473571777344, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.6467667818069458, "rewards_train/1-l": -2.388152837753296, "rewards_train/1-w": 2.710209369659424, "rewards_train/2-2": 2.5690767765045166, "rewards_train/2-w": 1.7286512851715088, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.09836220741272, "rewards_train/margins_1": 1.063442587852478, "rewards_train/margins_2": 0.8404254913330078, "step": 309 }, { "epoch": 0.93, "logps_train/policy_1_2": -182.29736328125, "logps_train/policy_1_l": -212.04867553710938, "logps_train/policy_1_w": -143.43222045898438, "logps_train/policy_2_2": -143.99449157714844, "logps_train/policy_2_w": -190.9320068359375, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.0624526739120483, "rewards_train/1-l": -2.990804672241211, "rewards_train/1-w": 2.854435682296753, "rewards_train/2-2": 2.281801223754883, "rewards_train/2-w": 1.0724246501922607, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.845240354537964, "rewards_train/margins_1": 1.7919830083847046, "rewards_train/margins_2": 1.209376573562622, "step": 309 }, { "epoch": 0.93, "logps_train/policy_1_2": -207.6182861328125, "logps_train/policy_1_l": -201.36819458007812, "logps_train/policy_1_w": -212.84652709960938, "logps_train/policy_2_2": -170.16455078125, "logps_train/policy_2_w": -272.4759521484375, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -248.0, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -280.0, "rewards_train/1-2": 1.3912975788116455, "rewards_train/1-l": -2.164945363998413, "rewards_train/1-w": 3.4278464317321777, "rewards_train/2-2": 2.7147951126098633, "rewards_train/2-w": 0.83990478515625, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.592791795730591, "rewards_train/margins_1": 2.0365488529205322, "rewards_train/margins_2": 1.8748903274536133, "step": 309 }, { "epoch": 0.93, "logps_train/policy_1_2": -128.35728454589844, "logps_train/policy_1_l": -95.14996337890625, "logps_train/policy_1_w": -74.10005187988281, "logps_train/policy_2_2": -87.40784454345703, "logps_train/policy_2_w": -109.9390869140625, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -114.5, "rewards_train/1-2": 0.5662242770195007, "rewards_train/1-l": -1.437848448753357, "rewards_train/1-w": 1.565775752067566, "rewards_train/2-2": 1.7015986442565918, "rewards_train/2-w": 0.4451536536216736, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.003624200820923, "rewards_train/margins_1": 0.9995514750480652, "rewards_train/margins_2": 1.2564449906349182, "step": 309 }, { "epoch": 0.93, "logps_train/policy_1_2": -108.80418395996094, "logps_train/policy_1_l": -117.78894805908203, "logps_train/policy_1_w": -84.36039733886719, "logps_train/policy_2_2": -86.92425537109375, "logps_train/policy_2_w": -112.07200622558594, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 1.437550663948059, "rewards_train/1-l": -1.2933478355407715, "rewards_train/1-w": 2.0687448978424072, "rewards_train/2-2": 2.165386199951172, "rewards_train/2-w": 0.9465103149414062, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.3620927333831787, "rewards_train/margins_1": 0.6311942338943481, "rewards_train/margins_2": 1.2188758850097656, "step": 309 }, { "epoch": 0.93, "logps_train/policy_1_2": -97.34053039550781, "logps_train/policy_1_l": -117.46568298339844, "logps_train/policy_1_w": -96.53225708007812, "logps_train/policy_2_2": -77.72575378417969, "logps_train/policy_2_w": -123.19854736328125, "logps_train/ref_1_2": -101.0, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -113.5, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 0.36516547203063965, "rewards_train/1-l": -2.665318489074707, "rewards_train/1-w": 1.6709930896759033, "rewards_train/2-2": 0.976252555847168, "rewards_train/2-w": 0.5160832405090332, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.33631157875061, "rewards_train/margins_1": 1.3058276176452637, "rewards_train/margins_2": 0.46016931533813477, "step": 309 }, { "epoch": 0.93, "learning_rate": 3.014946546852746e-06, "loss": 0.7525, "step": 310 }, { "epoch": 0.93, "logps_train/policy_1_2": -182.576416015625, "logps_train/policy_1_l": -136.6092529296875, "logps_train/policy_1_w": -143.71633911132812, "logps_train/policy_2_2": -146.53810119628906, "logps_train/policy_2_w": -172.6829833984375, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 0.4579828381538391, "rewards_train/1-l": -0.6726435422897339, "rewards_train/1-w": 2.2627413272857666, "rewards_train/2-2": 1.5844717025756836, "rewards_train/2-w": 1.2067019939422607, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.9353848695755005, "rewards_train/margins_1": 1.8047584891319275, "rewards_train/margins_2": 0.37776970863342285, "step": 310 }, { "epoch": 0.93, "logps_train/policy_1_2": -149.044921875, "logps_train/policy_1_l": -135.24208068847656, "logps_train/policy_1_w": -120.02056884765625, "logps_train/policy_2_2": -125.93338012695312, "logps_train/policy_2_w": -147.4561004638672, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.4884767532348633, "rewards_train/1-l": -1.402137279510498, "rewards_train/1-w": 2.1721620559692383, "rewards_train/2-2": 2.346505641937256, "rewards_train/2-w": 1.2504832744598389, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5742993354797363, "rewards_train/margins_1": 0.683685302734375, "rewards_train/margins_2": 1.096022367477417, "step": 310 }, { "epoch": 0.93, "logps_train/policy_1_2": -233.42971801757812, "logps_train/policy_1_l": -204.61138916015625, "logps_train/policy_1_w": -190.76451110839844, "logps_train/policy_2_2": -186.3468780517578, "logps_train/policy_2_w": -250.30625915527344, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -230.0, "logps_train/ref_2_2": -226.0, "logps_train/ref_2_w": -266.0, "rewards_train/1-2": 1.732027292251587, "rewards_train/1-l": -2.268951654434204, "rewards_train/1-w": 3.948549509048462, "rewards_train/2-2": 3.9153125286102295, "rewards_train/2-w": 1.6599998474121094, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.217501163482666, "rewards_train/margins_1": 2.216522216796875, "rewards_train/margins_2": 2.25531268119812, "step": 310 }, { "epoch": 0.93, "logps_train/policy_1_2": -182.3568572998047, "logps_train/policy_1_l": -123.75170135498047, "logps_train/policy_1_w": -106.31803131103516, "logps_train/policy_2_2": -141.9996795654297, "logps_train/policy_2_w": -137.0445556640625, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.0885326862335205, "rewards_train/1-l": -1.2530450820922852, "rewards_train/1-w": 2.2688376903533936, "rewards_train/2-2": 2.9203438758850098, "rewards_train/2-w": 1.1224976778030396, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5218827724456787, "rewards_train/margins_1": 1.180305004119873, "rewards_train/margins_2": 1.7978461980819702, "step": 310 }, { "epoch": 0.93, "logps_train/policy_1_2": -154.67111206054688, "logps_train/policy_1_l": -192.1038818359375, "logps_train/policy_1_w": -136.45619201660156, "logps_train/policy_2_2": -134.42495727539062, "logps_train/policy_2_w": -152.24935913085938, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.72409987449646, "rewards_train/1-l": -1.361560344696045, "rewards_train/1-w": 1.6575065851211548, "rewards_train/2-2": 2.4098472595214844, "rewards_train/2-w": 1.0707669258117676, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.0190669298171997, "rewards_train/margins_1": -0.06659328937530518, "rewards_train/margins_2": 1.3390803337097168, "step": 310 }, { "epoch": 0.93, "logps_train/policy_1_2": -175.42898559570312, "logps_train/policy_1_l": -190.9688262939453, "logps_train/policy_1_w": -116.64669799804688, "logps_train/policy_2_2": -149.07244873046875, "logps_train/policy_2_w": -146.2803955078125, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.471165418624878, "rewards_train/1-l": -2.671882152557373, "rewards_train/1-w": 2.160329818725586, "rewards_train/2-2": 2.2794747352600098, "rewards_train/2-w": 1.198523998260498, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.832211971282959, "rewards_train/margins_1": 0.689164400100708, "rewards_train/margins_2": 1.0809507369995117, "step": 310 }, { "epoch": 0.93, "logps_train/policy_1_2": -135.39898681640625, "logps_train/policy_1_l": -153.39047241210938, "logps_train/policy_1_w": -167.0653076171875, "logps_train/policy_2_2": -111.01766204833984, "logps_train/policy_2_w": -196.61065673828125, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.8120547533035278, "rewards_train/1-l": -1.9089689254760742, "rewards_train/1-w": 2.5465948581695557, "rewards_train/2-2": 2.580068826675415, "rewards_train/2-w": 1.1561217308044434, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.45556378364563, "rewards_train/margins_1": 0.7345401048660278, "rewards_train/margins_2": 1.4239470958709717, "step": 310 }, { "epoch": 0.93, "logps_train/policy_1_2": -132.69644165039062, "logps_train/policy_1_l": -160.8573455810547, "logps_train/policy_1_w": -89.81074523925781, "logps_train/policy_2_2": -103.60992431640625, "logps_train/policy_2_w": -119.12452697753906, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 0.8006669282913208, "rewards_train/1-l": -2.6236257553100586, "rewards_train/1-w": 2.247440814971924, "rewards_train/2-2": 1.706976056098938, "rewards_train/2-w": 1.1879377365112305, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.871066570281982, "rewards_train/margins_1": 1.446773886680603, "rewards_train/margins_2": 0.5190383195877075, "step": 310 }, { "epoch": 0.93, "logps_train/policy_1_2": -232.8946533203125, "logps_train/policy_1_l": -183.04031372070312, "logps_train/policy_1_w": -136.42266845703125, "logps_train/policy_2_2": -192.32618713378906, "logps_train/policy_2_w": -176.37310791015625, "logps_train/ref_1_2": -253.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 2.013660192489624, "rewards_train/1-l": -1.7102807760238647, "rewards_train/1-w": 3.0921082496643066, "rewards_train/2-2": 3.581444025039673, "rewards_train/2-w": 1.6626884937286377, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.802389025688171, "rewards_train/margins_1": 1.0784480571746826, "rewards_train/margins_2": 1.9187555313110352, "step": 311 }, { "epoch": 0.93, "logps_train/policy_1_2": -106.92280578613281, "logps_train/policy_1_l": -140.83285522460938, "logps_train/policy_1_w": -78.79437255859375, "logps_train/policy_2_2": -68.71199035644531, "logps_train/policy_2_w": -105.58666229248047, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -80.0, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": 0.47334474325180054, "rewards_train/1-l": -1.0820159912109375, "rewards_train/1-w": 1.9161685705184937, "rewards_train/2-2": 1.116642951965332, "rewards_train/2-w": 0.9503180980682373, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.998184561729431, "rewards_train/margins_1": 1.4428238272666931, "rewards_train/margins_2": 0.16632485389709473, "step": 311 }, { "epoch": 0.93, "logps_train/policy_1_2": -130.17816162109375, "logps_train/policy_1_l": -113.34528350830078, "logps_train/policy_1_w": -65.97608184814453, "logps_train/policy_2_2": -97.35791015625, "logps_train/policy_2_w": -102.26982116699219, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -85.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -112.5, "rewards_train/1-2": 0.7501511573791504, "rewards_train/1-l": -1.009626030921936, "rewards_train/1-w": 1.9016104936599731, "rewards_train/2-2": 2.09389591217041, "rewards_train/2-w": 1.008955478668213, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.911236524581909, "rewards_train/margins_1": 1.1514593362808228, "rewards_train/margins_2": 1.0849404335021973, "step": 311 }, { "epoch": 0.93, "logps_train/policy_1_2": -141.4906768798828, "logps_train/policy_1_l": -103.75302124023438, "logps_train/policy_1_w": -94.55378723144531, "logps_train/policy_2_2": -110.8124771118164, "logps_train/policy_2_w": -116.90463256835938, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 0.7665570974349976, "rewards_train/1-l": -1.537021517753601, "rewards_train/1-w": 1.122746229171753, "rewards_train/2-2": 1.7421895265579224, "rewards_train/2-w": 0.32516220211982727, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.659767746925354, "rewards_train/margins_1": 0.35618913173675537, "rewards_train/margins_2": 1.417027324438095, "step": 311 }, { "epoch": 0.93, "logps_train/policy_1_2": -225.61328125, "logps_train/policy_1_l": -159.27584838867188, "logps_train/policy_1_w": -187.15814208984375, "logps_train/policy_2_2": -175.629638671875, "logps_train/policy_2_w": -248.95069885253906, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -229.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -272.0, "rewards_train/1-2": 0.8136706352233887, "rewards_train/1-l": -2.234421968460083, "rewards_train/1-w": 4.195123672485352, "rewards_train/2-2": 2.796412944793701, "rewards_train/2-w": 2.3033666610717773, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.429545640945435, "rewards_train/margins_1": 3.381453037261963, "rewards_train/margins_2": 0.49304628372192383, "step": 311 }, { "epoch": 0.93, "logps_train/policy_1_2": -192.29823303222656, "logps_train/policy_1_l": -178.2716522216797, "logps_train/policy_1_w": -114.70069122314453, "logps_train/policy_2_2": -154.8614959716797, "logps_train/policy_2_w": -149.79458618164062, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.066271185874939, "rewards_train/1-l": -2.5396649837493896, "rewards_train/1-w": 2.4477052688598633, "rewards_train/2-2": 2.6574058532714844, "rewards_train/2-w": 1.289681315422058, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.987370252609253, "rewards_train/margins_1": 1.3814340829849243, "rewards_train/margins_2": 1.3677245378494263, "step": 311 }, { "epoch": 0.93, "logps_train/policy_1_2": -171.80361938476562, "logps_train/policy_1_l": -138.32965087890625, "logps_train/policy_1_w": -95.85685729980469, "logps_train/policy_2_2": -132.27401733398438, "logps_train/policy_2_w": -117.84306335449219, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 0.63057541847229, "rewards_train/1-l": -2.0626535415649414, "rewards_train/1-w": 2.079939603805542, "rewards_train/2-2": 2.382754325866699, "rewards_train/2-w": 1.1305370330810547, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.142593145370483, "rewards_train/margins_1": 1.449364185333252, "rewards_train/margins_2": 1.2522172927856445, "step": 311 }, { "epoch": 0.93, "logps_train/policy_1_2": -170.44073486328125, "logps_train/policy_1_l": -147.4195556640625, "logps_train/policy_1_w": -117.78353881835938, "logps_train/policy_2_2": -136.08584594726562, "logps_train/policy_2_w": -147.3515167236328, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 0.944988489151001, "rewards_train/1-l": -1.639415979385376, "rewards_train/1-w": 1.84293532371521, "rewards_train/2-2": 2.3171958923339844, "rewards_train/2-w": 1.1855506896972656, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.482351303100586, "rewards_train/margins_1": 0.897946834564209, "rewards_train/margins_2": 1.1316452026367188, "step": 311 }, { "epoch": 0.93, "learning_rate": 2.990753368538872e-06, "loss": 0.6658, "step": 312 }, { "epoch": 0.93, "logps_train/policy_1_2": -134.14183044433594, "logps_train/policy_1_l": -148.23336791992188, "logps_train/policy_1_w": -77.79020690917969, "logps_train/policy_2_2": -99.83889770507812, "logps_train/policy_2_w": -105.21387481689453, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -94.5, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 0.4076923131942749, "rewards_train/1-l": -1.723726511001587, "rewards_train/1-w": 1.6780104637145996, "rewards_train/2-2": 1.6403288841247559, "rewards_train/2-w": 0.9129879474639893, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4017369747161865, "rewards_train/margins_1": 1.2703181505203247, "rewards_train/margins_2": 0.7273409366607666, "step": 312 }, { "epoch": 0.93, "logps_train/policy_1_2": -191.89732360839844, "logps_train/policy_1_l": -181.6610565185547, "logps_train/policy_1_w": -133.96188354492188, "logps_train/policy_2_2": -144.74139404296875, "logps_train/policy_2_w": -185.61788940429688, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.047767996788025, "rewards_train/1-l": -1.618059515953064, "rewards_train/1-w": 2.163187026977539, "rewards_train/2-2": 1.9414856433868408, "rewards_train/2-w": 0.5694610476493835, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.781246542930603, "rewards_train/margins_1": 1.1154190301895142, "rewards_train/margins_2": 1.3720245957374573, "step": 312 }, { "epoch": 0.93, "logps_train/policy_1_2": -193.67616271972656, "logps_train/policy_1_l": -209.8723907470703, "logps_train/policy_1_w": -177.4775390625, "logps_train/policy_2_2": -162.86190795898438, "logps_train/policy_2_w": -230.3917694091797, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -216.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -252.0, "rewards_train/1-2": 2.2136340141296387, "rewards_train/1-l": -3.020051956176758, "rewards_train/1-w": 3.876464366912842, "rewards_train/2-2": 3.229434013366699, "rewards_train/2-w": 2.102229595184326, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.8965163230896, "rewards_train/margins_1": 1.6628303527832031, "rewards_train/margins_2": 1.127204418182373, "step": 312 }, { "epoch": 0.93, "logps_train/policy_1_2": -173.8748779296875, "logps_train/policy_1_l": -160.14663696289062, "logps_train/policy_1_w": -176.0931396484375, "logps_train/policy_2_2": -142.8766326904297, "logps_train/policy_2_w": -212.1327667236328, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -209.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": 1.7750117778778076, "rewards_train/1-l": -1.127943754196167, "rewards_train/1-w": 3.2289676666259766, "rewards_train/2-2": 3.0029613971710205, "rewards_train/2-w": 1.7710986137390137, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.3569114208221436, "rewards_train/margins_1": 1.453955888748169, "rewards_train/margins_2": 1.2318627834320068, "step": 312 }, { "epoch": 0.93, "logps_train/policy_1_2": -202.6592559814453, "logps_train/policy_1_l": -234.9813995361328, "logps_train/policy_1_w": -180.3328857421875, "logps_train/policy_2_2": -147.72645568847656, "logps_train/policy_2_w": -232.40390014648438, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -242.0, "rewards_train/1-2": 1.2903249263763428, "rewards_train/1-l": -2.731344223022461, "rewards_train/1-w": 2.985461711883545, "rewards_train/2-2": 2.7586042881011963, "rewards_train/2-w": 1.0158607959747314, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.716805934906006, "rewards_train/margins_1": 1.6951367855072021, "rewards_train/margins_2": 1.7427434921264648, "step": 312 }, { "epoch": 0.93, "logps_train/policy_1_2": -194.62606811523438, "logps_train/policy_1_l": -161.70079040527344, "logps_train/policy_1_w": -113.90794372558594, "logps_train/policy_2_2": -151.09136962890625, "logps_train/policy_2_w": -133.81661987304688, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.0373942852020264, "rewards_train/1-l": -2.599766731262207, "rewards_train/1-w": 2.249830722808838, "rewards_train/2-2": 3.0893008708953857, "rewards_train/2-w": 1.3902130126953125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.849597454071045, "rewards_train/margins_1": 1.2124364376068115, "rewards_train/margins_2": 1.6990878582000732, "step": 312 }, { "epoch": 0.93, "logps_train/policy_1_2": -159.770751953125, "logps_train/policy_1_l": -116.8056640625, "logps_train/policy_1_w": -140.180908203125, "logps_train/policy_2_2": -121.52096557617188, "logps_train/policy_2_w": -184.1807098388672, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 0.9494870901107788, "rewards_train/1-l": -0.6308598518371582, "rewards_train/1-w": 2.4624757766723633, "rewards_train/2-2": 2.2326698303222656, "rewards_train/2-w": 0.9452111721038818, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0933356285095215, "rewards_train/margins_1": 1.5129886865615845, "rewards_train/margins_2": 1.2874586582183838, "step": 312 }, { "epoch": 0.93, "logps_train/policy_1_2": -190.1265869140625, "logps_train/policy_1_l": -163.63670349121094, "logps_train/policy_1_w": -115.46070861816406, "logps_train/policy_2_2": -144.77423095703125, "logps_train/policy_2_w": -154.9233856201172, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.3264055252075195, "rewards_train/1-l": -2.4785139560699463, "rewards_train/1-w": 2.1851797103881836, "rewards_train/2-2": 3.1132020950317383, "rewards_train/2-w": 1.007660984992981, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.66369366645813, "rewards_train/margins_1": 0.8587741851806641, "rewards_train/margins_2": 2.1055411100387573, "step": 312 }, { "epoch": 0.94, "logps_train/policy_1_2": -107.61663818359375, "logps_train/policy_1_l": -97.39802551269531, "logps_train/policy_1_w": -70.1734390258789, "logps_train/policy_2_2": -83.23749542236328, "logps_train/policy_2_w": -96.24925994873047, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -86.5, "logps_train/ref_1_w": -85.5, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -107.0, "rewards_train/1-2": 1.1459532976150513, "rewards_train/1-l": -1.0774126052856445, "rewards_train/1-w": 1.5396876335144043, "rewards_train/2-2": 1.8635555505752563, "rewards_train/2-w": 1.1117925643920898, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.617100238800049, "rewards_train/margins_1": 0.393734335899353, "rewards_train/margins_2": 0.7517629861831665, "step": 313 }, { "epoch": 0.94, "logps_train/policy_1_2": -161.29660034179688, "logps_train/policy_1_l": -187.79823303222656, "logps_train/policy_1_w": -111.97885131835938, "logps_train/policy_2_2": -133.8450164794922, "logps_train/policy_2_w": -137.05426025390625, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.2859656810760498, "rewards_train/1-l": -1.9438854455947876, "rewards_train/1-w": 1.7896143198013306, "rewards_train/2-2": 2.2904982566833496, "rewards_train/2-w": 1.2732847929000854, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.733499765396118, "rewards_train/margins_1": 0.5036486387252808, "rewards_train/margins_2": 1.0172134637832642, "step": 313 }, { "epoch": 0.94, "logps_train/policy_1_2": -146.3782958984375, "logps_train/policy_1_l": -111.42328643798828, "logps_train/policy_1_w": -155.301025390625, "logps_train/policy_2_2": -126.43753051757812, "logps_train/policy_2_w": -181.09432983398438, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": 1.073107361793518, "rewards_train/1-l": -0.7110787034034729, "rewards_train/1-w": 2.2808361053466797, "rewards_train/2-2": 1.6414035558700562, "rewards_train/2-w": 1.5858807563781738, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.9919148087501526, "rewards_train/margins_1": 1.2077287435531616, "rewards_train/margins_2": 0.055522799491882324, "step": 313 }, { "epoch": 0.94, "logps_train/policy_1_2": -174.19931030273438, "logps_train/policy_1_l": -191.05650329589844, "logps_train/policy_1_w": -119.44253540039062, "logps_train/policy_2_2": -146.91156005859375, "logps_train/policy_2_w": -151.48355102539062, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.6331946849822998, "rewards_train/1-l": -1.9704935550689697, "rewards_train/1-w": 2.536997079849243, "rewards_train/2-2": 2.6010327339172363, "rewards_train/2-w": 1.642270565032959, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.507490634918213, "rewards_train/margins_1": 0.9038023948669434, "rewards_train/margins_2": 0.9587621688842773, "step": 313 }, { "epoch": 0.94, "logps_train/policy_1_2": -169.0736541748047, "logps_train/policy_1_l": -185.71546936035156, "logps_train/policy_1_w": -155.19747924804688, "logps_train/policy_2_2": -151.7333984375, "logps_train/policy_2_w": -195.94918823242188, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 2.2652909755706787, "rewards_train/1-l": -1.0254533290863037, "rewards_train/1-w": 2.7337679862976074, "rewards_train/2-2": 2.797752857208252, "rewards_train/2-w": 1.9875023365020752, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.759221315383911, "rewards_train/margins_1": 0.4684770107269287, "rewards_train/margins_2": 0.8102505207061768, "step": 313 }, { "epoch": 0.94, "logps_train/policy_1_2": -132.17295837402344, "logps_train/policy_1_l": -151.73550415039062, "logps_train/policy_1_w": -81.50511169433594, "logps_train/policy_2_2": -103.85958862304688, "logps_train/policy_2_w": -106.12183380126953, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 0.789735734462738, "rewards_train/1-l": -2.370816230773926, "rewards_train/1-w": 1.665894627571106, "rewards_train/2-2": 1.8949004411697388, "rewards_train/2-w": 0.9526605010032654, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.036710858345032, "rewards_train/margins_1": 0.8761588931083679, "rewards_train/margins_2": 0.9422399401664734, "step": 313 }, { "epoch": 0.94, "logps_train/policy_1_2": -54.74090576171875, "logps_train/policy_1_l": -27.225223541259766, "logps_train/policy_1_w": -32.1856575012207, "logps_train/policy_2_2": -46.700164794921875, "logps_train/policy_2_w": -43.82804870605469, "logps_train/ref_1_2": -59.0, "logps_train/ref_1_l": -20.875, "logps_train/ref_1_w": -32.75, "logps_train/ref_2_2": -54.5, "logps_train/ref_2_w": -42.25, "rewards_train/1-2": 0.4270813465118408, "rewards_train/1-l": -0.6354131698608398, "rewards_train/1-w": 0.0650281012058258, "rewards_train/2-2": 0.7721708416938782, "rewards_train/2-w": -0.16171132028102875, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.7004412710666656, "rewards_train/margins_1": -0.362053245306015, "rewards_train/margins_2": 0.9338821619749069, "step": 313 }, { "epoch": 0.94, "logps_train/policy_1_2": -99.94181060791016, "logps_train/policy_1_l": -111.24473571777344, "logps_train/policy_1_w": -73.90469360351562, "logps_train/policy_2_2": -73.58386993408203, "logps_train/policy_2_w": -99.66219329833984, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -102.5, "rewards_train/1-2": 1.3980062007904053, "rewards_train/1-l": -1.8262314796447754, "rewards_train/1-w": 1.0233006477355957, "rewards_train/2-2": 1.8158317804336548, "rewards_train/2-w": 0.2628821134567261, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.849532127380371, "rewards_train/margins_1": -0.37470555305480957, "rewards_train/margins_2": 1.5529496669769287, "step": 313 }, { "epoch": 0.94, "learning_rate": 2.9665122935613727e-06, "loss": 0.7698, "step": 314 }, { "epoch": 0.94, "logps_train/policy_1_2": -104.52978515625, "logps_train/policy_1_l": -95.4722900390625, "logps_train/policy_1_w": -68.14579772949219, "logps_train/policy_2_2": -73.9903335571289, "logps_train/policy_2_w": -108.43971252441406, "logps_train/ref_1_2": -111.5, "logps_train/ref_1_l": -85.0, "logps_train/ref_1_w": -85.0, "logps_train/ref_2_2": -89.0, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 0.6860837936401367, "rewards_train/1-l": -1.031213402748108, "rewards_train/1-w": 1.6643261909484863, "rewards_train/2-2": 1.4730126857757568, "rewards_train/2-w": 0.5228254795074463, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.6955395936965942, "rewards_train/margins_1": 0.9782423973083496, "rewards_train/margins_2": 0.9501872062683105, "step": 314 }, { "epoch": 0.94, "logps_train/policy_1_2": -252.41464233398438, "logps_train/policy_1_l": -211.94924926757812, "logps_train/policy_1_w": -154.46286010742188, "logps_train/policy_2_2": -200.97482299804688, "logps_train/policy_2_w": -187.14691162109375, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -229.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 0.6616603136062622, "rewards_train/1-l": -2.2597694396972656, "rewards_train/1-w": 2.892777442932129, "rewards_train/2-2": 2.780642032623291, "rewards_train/2-w": 1.5712467432022095, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.1525468826293945, "rewards_train/margins_1": 2.2311171293258667, "rewards_train/margins_2": 1.2093952894210815, "step": 314 }, { "epoch": 0.94, "logps_train/policy_1_2": -223.39028930664062, "logps_train/policy_1_l": -248.89199829101562, "logps_train/policy_1_w": -174.85067749023438, "logps_train/policy_2_2": -178.47097778320312, "logps_train/policy_2_w": -236.45126342773438, "logps_train/ref_1_2": -241.0, "logps_train/ref_1_l": -224.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.6922211647033691, "rewards_train/1-l": -2.4089272022247314, "rewards_train/1-w": 3.2164950370788574, "rewards_train/2-2": 3.3950893878936768, "rewards_train/2-w": 1.7361242771148682, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.625422239303589, "rewards_train/margins_1": 1.5242738723754883, "rewards_train/margins_2": 1.6589651107788086, "step": 314 }, { "epoch": 0.94, "logps_train/policy_1_2": -117.7701187133789, "logps_train/policy_1_l": -252.3519287109375, "logps_train/policy_1_w": -131.35269165039062, "logps_train/policy_2_2": -94.26740264892578, "logps_train/policy_2_w": -158.92469787597656, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -220.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.167909860610962, "rewards_train/1-l": -3.1815803050994873, "rewards_train/1-w": 2.0162932872772217, "rewards_train/2-2": 1.7056821584701538, "rewards_train/2-w": 0.8606551289558411, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.197873592376709, "rewards_train/margins_1": 0.8483834266662598, "rewards_train/margins_2": 0.8450270295143127, "step": 314 }, { "epoch": 0.94, "logps_train/policy_1_2": -244.64871215820312, "logps_train/policy_1_l": -195.84841918945312, "logps_train/policy_1_w": -156.0831756591797, "logps_train/policy_2_2": -205.19464111328125, "logps_train/policy_2_w": -183.47227478027344, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 0.5320044159889221, "rewards_train/1-l": -2.6596479415893555, "rewards_train/1-w": 3.09480619430542, "rewards_train/2-2": 2.2258477210998535, "rewards_train/2-w": 2.1683976650238037, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.754454135894775, "rewards_train/margins_1": 2.562801778316498, "rewards_train/margins_2": 0.057450056076049805, "step": 314 }, { "epoch": 0.94, "logps_train/policy_1_2": -145.7650909423828, "logps_train/policy_1_l": -104.92537689208984, "logps_train/policy_1_w": -113.62641906738281, "logps_train/policy_2_2": -114.18246459960938, "logps_train/policy_2_w": -141.95144653320312, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 1.3594284057617188, "rewards_train/1-l": -1.0513266324996948, "rewards_train/1-w": 1.990483283996582, "rewards_train/2-2": 2.422379493713379, "rewards_train/2-w": 0.9111051559448242, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.041809916496277, "rewards_train/margins_1": 0.6310548782348633, "rewards_train/margins_2": 1.5112743377685547, "step": 314 }, { "epoch": 0.94, "logps_train/policy_1_2": -126.18959045410156, "logps_train/policy_1_l": -114.80067443847656, "logps_train/policy_1_w": -102.47013092041016, "logps_train/policy_2_2": -98.77513122558594, "logps_train/policy_2_w": -140.67706298828125, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -96.5, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.123228907585144, "rewards_train/1-l": -1.8255757093429565, "rewards_train/1-w": 2.4701743125915527, "rewards_train/2-2": 1.7177993059158325, "rewards_train/2-w": 1.1002635955810547, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.295750021934509, "rewards_train/margins_1": 1.3469454050064087, "rewards_train/margins_2": 0.6175357103347778, "step": 314 }, { "epoch": 0.94, "logps_train/policy_1_2": -197.08897399902344, "logps_train/policy_1_l": -220.91293334960938, "logps_train/policy_1_w": -117.87881469726562, "logps_train/policy_2_2": -152.42396545410156, "logps_train/policy_2_w": -165.40365600585938, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.4770398139953613, "rewards_train/1-l": -3.146517753601074, "rewards_train/1-w": 2.9750094413757324, "rewards_train/2-2": 3.066197156906128, "rewards_train/2-w": 1.2971341609954834, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.121527194976807, "rewards_train/margins_1": 1.497969627380371, "rewards_train/margins_2": 1.7690629959106445, "step": 314 }, { "epoch": 0.94, "logps_train/policy_1_2": -161.88888549804688, "logps_train/policy_1_l": -107.1249008178711, "logps_train/policy_1_w": -122.29666137695312, "logps_train/policy_2_2": -133.5877685546875, "logps_train/policy_2_w": -152.53302001953125, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.2361114025115967, "rewards_train/1-l": -1.1328027248382568, "rewards_train/1-w": 2.0804896354675293, "rewards_train/2-2": 2.053722858428955, "rewards_train/2-w": 1.0271670818328857, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.213292360305786, "rewards_train/margins_1": 0.8443782329559326, "rewards_train/margins_2": 1.0265557765960693, "step": 315 }, { "epoch": 0.94, "logps_train/policy_1_2": -189.35110473632812, "logps_train/policy_1_l": -171.63760375976562, "logps_train/policy_1_w": -173.25894165039062, "logps_train/policy_2_2": -142.09878540039062, "logps_train/policy_2_w": -235.55990600585938, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -244.0, "rewards_train/1-2": 1.5883266925811768, "rewards_train/1-l": -2.3399319648742676, "rewards_train/1-w": 3.127230167388916, "rewards_train/2-2": 2.7729339599609375, "rewards_train/2-w": 0.8361979126930237, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.467162132263184, "rewards_train/margins_1": 1.5389034748077393, "rewards_train/margins_2": 1.9367360472679138, "step": 315 }, { "epoch": 0.94, "logps_train/policy_1_2": -183.52352905273438, "logps_train/policy_1_l": -171.71290588378906, "logps_train/policy_1_w": -102.01023864746094, "logps_train/policy_2_2": -148.5443878173828, "logps_train/policy_2_w": -131.41238403320312, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.7855371236801147, "rewards_train/1-l": -1.4314467906951904, "rewards_train/1-w": 2.7349131107330322, "rewards_train/2-2": 2.249077320098877, "rewards_train/2-w": 2.0243866443634033, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.166359901428223, "rewards_train/margins_1": 1.9493759870529175, "rewards_train/margins_2": 0.22469067573547363, "step": 315 }, { "epoch": 0.94, "logps_train/policy_1_2": -101.0210952758789, "logps_train/policy_1_l": -155.8102264404297, "logps_train/policy_1_w": -115.04146575927734, "logps_train/policy_2_2": -78.34141540527344, "logps_train/policy_2_w": -140.013916015625, "logps_train/ref_1_2": -112.5, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.122890830039978, "rewards_train/1-l": -2.0632503032684326, "rewards_train/1-w": 2.1880407333374023, "rewards_train/2-2": 1.949452519416809, "rewards_train/2-w": 1.1673595905303955, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.251291036605835, "rewards_train/margins_1": 1.0651499032974243, "rewards_train/margins_2": 0.7820929288864136, "step": 315 }, { "epoch": 0.94, "logps_train/policy_1_2": -123.72335815429688, "logps_train/policy_1_l": -93.37503051757812, "logps_train/policy_1_w": -84.44708251953125, "logps_train/policy_2_2": -99.16454315185547, "logps_train/policy_2_w": -113.52488708496094, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 0.8909450769424438, "rewards_train/1-l": -0.95664381980896, "rewards_train/1-w": 1.8552911281585693, "rewards_train/2-2": 1.5897960662841797, "rewards_train/2-w": 0.9689952731132507, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.8119349479675293, "rewards_train/margins_1": 0.9643460512161255, "rewards_train/margins_2": 0.620800793170929, "step": 315 }, { "epoch": 0.94, "logps_train/policy_1_2": -188.11477661132812, "logps_train/policy_1_l": -125.27723693847656, "logps_train/policy_1_w": -89.34779357910156, "logps_train/policy_2_2": -149.06581115722656, "logps_train/policy_2_w": -119.36707305908203, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.427583932876587, "rewards_train/1-l": -1.7099496126174927, "rewards_train/1-w": 2.3785016536712646, "rewards_train/2-2": 2.8574817180633545, "rewards_train/2-w": 1.6668086051940918, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.088451266288757, "rewards_train/margins_1": 0.9509177207946777, "rewards_train/margins_2": 1.1906731128692627, "step": 315 }, { "epoch": 0.94, "logps_train/policy_1_2": -265.4988708496094, "logps_train/policy_1_l": -162.47256469726562, "logps_train/policy_1_w": -185.9690399169922, "logps_train/policy_2_2": -208.27357482910156, "logps_train/policy_2_w": -231.86720275878906, "logps_train/ref_1_2": -286.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -226.0, "logps_train/ref_2_2": -245.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 2.0063631534576416, "rewards_train/1-l": -1.3184480667114258, "rewards_train/1-w": 4.028095722198486, "rewards_train/2-2": 3.6226439476013184, "rewards_train/2-w": 2.394528865814209, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.346543788909912, "rewards_train/margins_1": 2.0217325687408447, "rewards_train/margins_2": 1.2281150817871094, "step": 315 }, { "epoch": 0.94, "logps_train/policy_1_2": -71.57322692871094, "logps_train/policy_1_l": -89.07469177246094, "logps_train/policy_1_w": -35.99615478515625, "logps_train/policy_2_2": -57.70426559448242, "logps_train/policy_2_w": -46.251243591308594, "logps_train/ref_1_2": -78.0, "logps_train/ref_1_l": -71.0, "logps_train/ref_1_w": -48.5, "logps_train/ref_2_2": -69.0, "logps_train/ref_2_w": -55.0, "rewards_train/1-2": 0.6137715578079224, "rewards_train/1-l": -1.7904771566390991, "rewards_train/1-w": 1.2683534622192383, "rewards_train/2-2": 1.1092610359191895, "rewards_train/2-w": 0.855344295501709, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0588306188583374, "rewards_train/margins_1": 0.6545819044113159, "rewards_train/margins_2": 0.25391674041748047, "step": 315 }, { "epoch": 0.95, "learning_rate": 2.9422256878064326e-06, "loss": 0.6371, "step": 316 }, { "epoch": 0.95, "logps_train/policy_1_2": -131.56597900390625, "logps_train/policy_1_l": -168.4808349609375, "logps_train/policy_1_w": -117.60301208496094, "logps_train/policy_2_2": -102.912353515625, "logps_train/policy_2_w": -152.76617431640625, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 0.23324668407440186, "rewards_train/1-l": -2.096911907196045, "rewards_train/1-w": 1.8996602296829224, "rewards_train/2-2": 0.9314212203025818, "rewards_train/2-w": 0.9671337604522705, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.9965721368789673, "rewards_train/margins_1": 1.6664135456085205, "rewards_train/margins_2": -0.03571254014968872, "step": 316 }, { "epoch": 0.95, "logps_train/policy_1_2": -143.44418334960938, "logps_train/policy_1_l": -163.9179229736328, "logps_train/policy_1_w": -121.44609069824219, "logps_train/policy_2_2": -118.67807006835938, "logps_train/policy_2_w": -147.2510986328125, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.4196436405181885, "rewards_train/1-l": -2.409761667251587, "rewards_train/1-w": 1.8788278102874756, "rewards_train/2-2": 2.414419174194336, "rewards_train/2-w": 0.5600466132164001, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.2885894775390625, "rewards_train/margins_1": 0.4591841697692871, "rewards_train/margins_2": 1.8543725609779358, "step": 316 }, { "epoch": 0.95, "logps_train/policy_1_2": -145.62261962890625, "logps_train/policy_1_l": -145.11965942382812, "logps_train/policy_1_w": -110.59799194335938, "logps_train/policy_2_2": -115.08834075927734, "logps_train/policy_2_w": -139.6046905517578, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 0.7510195374488831, "rewards_train/1-l": -2.065091609954834, "rewards_train/1-w": 2.4126617908477783, "rewards_train/2-2": 2.1864781379699707, "rewards_train/2-w": 1.5043742656707764, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.477753400802612, "rewards_train/margins_1": 1.6616422533988953, "rewards_train/margins_2": 0.6821038722991943, "step": 316 }, { "epoch": 0.95, "logps_train/policy_1_2": -140.2473602294922, "logps_train/policy_1_l": -120.85681915283203, "logps_train/policy_1_w": -130.49658203125, "logps_train/policy_2_2": -105.46771240234375, "logps_train/policy_2_w": -169.51853942871094, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.1158888339996338, "rewards_train/1-l": -1.9302129745483398, "rewards_train/1-w": 2.6476082801818848, "rewards_train/2-2": 2.26924467086792, "rewards_train/2-w": 1.0571305751800537, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.577821254730225, "rewards_train/margins_1": 1.531719446182251, "rewards_train/margins_2": 1.2121140956878662, "step": 316 }, { "epoch": 0.95, "logps_train/policy_1_2": -247.63507080078125, "logps_train/policy_1_l": -208.5357208251953, "logps_train/policy_1_w": -176.57168579101562, "logps_train/policy_2_2": -215.86793518066406, "logps_train/policy_2_w": -214.33560180664062, "logps_train/ref_1_2": -274.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -253.0, "logps_train/ref_2_w": -236.0, "rewards_train/1-2": 2.6614933013916016, "rewards_train/1-l": -2.1348233222961426, "rewards_train/1-w": 3.4178309440612793, "rewards_train/2-2": 3.678832530975342, "rewards_train/2-w": 2.16644024848938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.552654266357422, "rewards_train/margins_1": 0.7563376426696777, "rewards_train/margins_2": 1.512392282485962, "step": 316 }, { "epoch": 0.95, "logps_train/policy_1_2": -78.92176818847656, "logps_train/policy_1_l": -78.63941955566406, "logps_train/policy_1_w": -81.0685043334961, "logps_train/policy_2_2": -64.27149963378906, "logps_train/policy_2_w": -104.1702880859375, "logps_train/ref_1_2": -87.0, "logps_train/ref_1_l": -72.5, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -77.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 0.8313584327697754, "rewards_train/1-l": -0.6065196394920349, "rewards_train/1-w": 2.2870945930480957, "rewards_train/2-2": 1.2512195110321045, "rewards_train/2-w": 1.1980103254318237, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.8936142325401306, "rewards_train/margins_1": 1.4557361602783203, "rewards_train/margins_2": 0.05320918560028076, "step": 316 }, { "epoch": 0.95, "logps_train/policy_1_2": -176.06675720214844, "logps_train/policy_1_l": -276.8492736816406, "logps_train/policy_1_w": -158.44415283203125, "logps_train/policy_2_2": -137.0191650390625, "logps_train/policy_2_w": -205.99578857421875, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -244.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 1.4636361598968506, "rewards_train/1-l": -3.2493815422058105, "rewards_train/1-w": 2.3126163482666016, "rewards_train/2-2": 2.8308956623077393, "rewards_train/2-w": 1.0519826412200928, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.561997890472412, "rewards_train/margins_1": 0.848980188369751, "rewards_train/margins_2": 1.7789130210876465, "step": 316 }, { "epoch": 0.95, "logps_train/policy_1_2": -164.97467041015625, "logps_train/policy_1_l": -209.72549438476562, "logps_train/policy_1_w": -146.65988159179688, "logps_train/policy_2_2": -127.2227783203125, "logps_train/policy_2_w": -208.7412567138672, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -221.0, "rewards_train/1-2": 0.5525321960449219, "rewards_train/1-l": -2.4475483894348145, "rewards_train/1-w": 3.428933620452881, "rewards_train/2-2": 1.508190393447876, "rewards_train/2-w": 1.1711866855621338, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.876482009887695, "rewards_train/margins_1": 2.876401424407959, "rewards_train/margins_2": 0.3370037078857422, "step": 316 }, { "epoch": 0.95, "logps_train/policy_1_2": -137.87738037109375, "logps_train/policy_1_l": -235.26551818847656, "logps_train/policy_1_w": -147.8489532470703, "logps_train/policy_2_2": -104.74057006835938, "logps_train/policy_2_w": -188.09820556640625, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.2403879165649414, "rewards_train/1-l": -3.050380229949951, "rewards_train/1-w": 2.6072914600372314, "rewards_train/2-2": 2.490004539489746, "rewards_train/2-w": 1.0198655128479004, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.657671689987183, "rewards_train/margins_1": 1.36690354347229, "rewards_train/margins_2": 1.4701390266418457, "step": 317 }, { "epoch": 0.95, "logps_train/policy_1_2": -156.97409057617188, "logps_train/policy_1_l": -167.04995727539062, "logps_train/policy_1_w": -120.39918518066406, "logps_train/policy_2_2": -133.72329711914062, "logps_train/policy_2_w": -146.1060791015625, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.308840274810791, "rewards_train/1-l": -1.8243306875228882, "rewards_train/1-w": 2.3632068634033203, "rewards_train/2-2": 2.424056053161621, "rewards_train/2-w": 1.5972055196762085, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.1875375509262085, "rewards_train/margins_1": 1.0543665885925293, "rewards_train/margins_2": 0.8268505334854126, "step": 317 }, { "epoch": 0.95, "logps_train/policy_1_2": -254.94232177734375, "logps_train/policy_1_l": -202.774658203125, "logps_train/policy_1_w": -208.8914031982422, "logps_train/policy_2_2": -214.65817260742188, "logps_train/policy_2_w": -252.10015869140625, "logps_train/ref_1_2": -274.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -243.0, "logps_train/ref_2_2": -250.0, "logps_train/ref_2_w": -272.0, "rewards_train/1-2": 1.9271552562713623, "rewards_train/1-l": -2.1077089309692383, "rewards_train/1-w": 3.3772668838500977, "rewards_train/2-2": 3.5146751403808594, "rewards_train/2-w": 1.911077857017517, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.484975814819336, "rewards_train/margins_1": 1.4501116275787354, "rewards_train/margins_2": 1.6035972833633423, "step": 317 }, { "epoch": 0.95, "logps_train/policy_1_2": -248.47662353515625, "logps_train/policy_1_l": -226.67245483398438, "logps_train/policy_1_w": -192.11257934570312, "logps_train/policy_2_2": -211.5598602294922, "logps_train/policy_2_w": -225.8842315673828, "logps_train/ref_1_2": -264.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -228.0, "logps_train/ref_2_2": -242.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 1.5538986921310425, "rewards_train/1-l": -1.9078702926635742, "rewards_train/1-w": 3.524679660797119, "rewards_train/2-2": 3.087764263153076, "rewards_train/2-w": 2.2397031784057617, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.432549953460693, "rewards_train/margins_1": 1.9707809686660767, "rewards_train/margins_2": 0.8480610847473145, "step": 317 }, { "epoch": 0.95, "logps_train/policy_1_2": -118.02671813964844, "logps_train/policy_1_l": -138.2164764404297, "logps_train/policy_1_w": -78.62171936035156, "logps_train/policy_2_2": -97.05651092529297, "logps_train/policy_2_w": -115.09669494628906, "logps_train/ref_1_2": -127.5, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 0.9723290205001831, "rewards_train/1-l": -1.6646167039871216, "rewards_train/1-w": 1.9612650871276855, "rewards_train/2-2": 1.6826298236846924, "rewards_train/2-w": 0.7981427907943726, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.625881791114807, "rewards_train/margins_1": 0.9889360666275024, "rewards_train/margins_2": 0.8844870328903198, "step": 317 }, { "epoch": 0.95, "logps_train/policy_1_2": -136.1317596435547, "logps_train/policy_1_l": -222.14010620117188, "logps_train/policy_1_w": -66.12997436523438, "logps_train/policy_2_2": -114.01020050048828, "logps_train/policy_2_w": -80.92646789550781, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": 0.6454177498817444, "rewards_train/1-l": -3.9968228340148926, "rewards_train/1-w": 1.8186432123184204, "rewards_train/2-2": 1.616949439048767, "rewards_train/2-w": 1.4221971035003662, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.815466046333313, "rewards_train/margins_1": 1.173225462436676, "rewards_train/margins_2": 0.19475233554840088, "step": 317 }, { "epoch": 0.95, "logps_train/policy_1_2": -255.26113891601562, "logps_train/policy_1_l": -177.59286499023438, "logps_train/policy_1_w": -201.18600463867188, "logps_train/policy_2_2": -210.01145935058594, "logps_train/policy_2_w": -247.97459411621094, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -225.0, "logps_train/ref_2_2": -244.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.433262586593628, "rewards_train/1-l": -1.863192081451416, "rewards_train/1-w": 2.372804641723633, "rewards_train/2-2": 3.303541421890259, "rewards_train/2-w": 0.5587897896766663, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.235996723175049, "rewards_train/margins_1": 0.9395420551300049, "rewards_train/margins_2": 2.7447516322135925, "step": 317 }, { "epoch": 0.95, "logps_train/policy_1_2": -218.0706787109375, "logps_train/policy_1_l": -163.40216064453125, "logps_train/policy_1_w": -130.03123474121094, "logps_train/policy_2_2": -181.8612060546875, "logps_train/policy_2_w": -168.38623046875, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.224181890487671, "rewards_train/1-l": -1.6773746013641357, "rewards_train/1-w": 3.3000025749206543, "rewards_train/2-2": 2.98067569732666, "rewards_train/2-w": 1.508252501487732, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.97737717628479, "rewards_train/margins_1": 2.0758206844329834, "rewards_train/margins_2": 1.4724231958389282, "step": 317 }, { "epoch": 0.95, "learning_rate": 2.917895921603958e-06, "loss": 0.6961, "step": 318 }, { "epoch": 0.95, "logps_train/policy_1_2": -125.36886596679688, "logps_train/policy_1_l": -149.4711151123047, "logps_train/policy_1_w": -124.49365234375, "logps_train/policy_2_2": -101.79338073730469, "logps_train/policy_2_w": -167.44384765625, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.2013943195343018, "rewards_train/1-l": -1.568498134613037, "rewards_train/1-w": 1.629541039466858, "rewards_train/2-2": 1.7487863302230835, "rewards_train/2-w": 0.5024908185005188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.198039174079895, "rewards_train/margins_1": 0.42814671993255615, "rewards_train/margins_2": 1.2462955117225647, "step": 318 }, { "epoch": 0.95, "logps_train/policy_1_2": -104.3389892578125, "logps_train/policy_1_l": -108.63356018066406, "logps_train/policy_1_w": -119.65833282470703, "logps_train/policy_2_2": -80.81808471679688, "logps_train/policy_2_w": -145.77557373046875, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -95.5, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 0.6739128828048706, "rewards_train/1-l": -1.3359630107879639, "rewards_train/1-w": 2.4678092002868652, "rewards_train/2-2": 1.4666287899017334, "rewards_train/2-w": 1.7035462856292725, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.803772211074829, "rewards_train/margins_1": 1.7938963174819946, "rewards_train/margins_2": -0.23691749572753906, "step": 318 }, { "epoch": 0.95, "logps_train/policy_1_2": -168.2877197265625, "logps_train/policy_1_l": -132.3762664794922, "logps_train/policy_1_w": -107.65274047851562, "logps_train/policy_2_2": -117.25775146484375, "logps_train/policy_2_w": -157.88272094726562, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 0.5345090627670288, "rewards_train/1-l": -1.7370400428771973, "rewards_train/1-w": 1.7876551151275635, "rewards_train/2-2": 1.9343812465667725, "rewards_train/2-w": 0.12579011917114258, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.5246951580047607, "rewards_train/margins_1": 1.2531460523605347, "rewards_train/margins_2": 1.8085911273956299, "step": 318 }, { "epoch": 0.95, "logps_train/policy_1_2": -127.78437042236328, "logps_train/policy_1_l": -107.90428161621094, "logps_train/policy_1_w": -115.57340240478516, "logps_train/policy_2_2": -93.48966979980469, "logps_train/policy_2_w": -151.27423095703125, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 0.9395318627357483, "rewards_train/1-l": -0.9880843162536621, "rewards_train/1-w": 1.958284616470337, "rewards_train/2-2": 1.9857981204986572, "rewards_train/2-w": 1.0475776195526123, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.946368932723999, "rewards_train/margins_1": 1.0187527537345886, "rewards_train/margins_2": 0.9382205009460449, "step": 318 }, { "epoch": 0.95, "logps_train/policy_1_2": -136.11257934570312, "logps_train/policy_1_l": -283.4867248535156, "logps_train/policy_1_w": -172.66458129882812, "logps_train/policy_2_2": -99.38482666015625, "logps_train/policy_2_w": -232.5025177001953, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -272.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 1.1039769649505615, "rewards_train/1-l": -1.1771882772445679, "rewards_train/1-w": 3.1601054668426514, "rewards_train/2-2": 2.1056578159332275, "rewards_train/2-w": 1.4778739213943481, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.337293744087219, "rewards_train/margins_1": 2.05612850189209, "rewards_train/margins_2": 0.6277838945388794, "step": 318 }, { "epoch": 0.95, "logps_train/policy_1_2": -226.38681030273438, "logps_train/policy_1_l": -328.568359375, "logps_train/policy_1_w": -149.60519409179688, "logps_train/policy_2_2": -173.20101928710938, "logps_train/policy_2_w": -201.92300415039062, "logps_train/ref_1_2": -237.0, "logps_train/ref_1_l": -284.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 1.0456947088241577, "rewards_train/1-l": -4.524022579193115, "rewards_train/1-w": 2.980105400085449, "rewards_train/2-2": 3.0392727851867676, "rewards_train/2-w": 1.2842614650726318, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.5041279792785645, "rewards_train/margins_1": 1.9344106912612915, "rewards_train/margins_2": 1.7550113201141357, "step": 318 }, { "epoch": 0.95, "logps_train/policy_1_2": -193.449951171875, "logps_train/policy_1_l": -258.9118957519531, "logps_train/policy_1_w": -177.79025268554688, "logps_train/policy_2_2": -166.697265625, "logps_train/policy_2_w": -219.83892822265625, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -232.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -236.0, "rewards_train/1-2": 2.055004358291626, "rewards_train/1-l": -2.6749789714813232, "rewards_train/1-w": 3.38425612449646, "rewards_train/2-2": 2.702930212020874, "rewards_train/2-w": 1.6723575592041016, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.059235095977783, "rewards_train/margins_1": 1.329251766204834, "rewards_train/margins_2": 1.0305726528167725, "step": 318 }, { "epoch": 0.95, "logps_train/policy_1_2": -86.2771987915039, "logps_train/policy_1_l": -106.28651428222656, "logps_train/policy_1_w": -94.30945587158203, "logps_train/policy_2_2": -65.18333435058594, "logps_train/policy_2_w": -113.6331787109375, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -84.0, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": 1.3925926685333252, "rewards_train/1-l": -1.3938853740692139, "rewards_train/1-w": 2.0088980197906494, "rewards_train/2-2": 1.8980727195739746, "rewards_train/2-w": 1.3616819381713867, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4027833938598633, "rewards_train/margins_1": 0.6163053512573242, "rewards_train/margins_2": 0.5363907814025879, "step": 318 }, { "epoch": 0.96, "logps_train/policy_1_2": -261.4773254394531, "logps_train/policy_1_l": -407.9955749511719, "logps_train/policy_1_w": -203.87567138671875, "logps_train/policy_2_2": -190.7694854736328, "logps_train/policy_2_w": -262.6654052734375, "logps_train/ref_1_2": -274.0, "logps_train/ref_1_l": -364.0, "logps_train/ref_1_w": -243.0, "logps_train/ref_2_2": -220.0, "logps_train/ref_2_w": -278.0, "rewards_train/1-2": 1.1897681951522827, "rewards_train/1-l": -4.299556732177734, "rewards_train/1-w": 3.8624331951141357, "rewards_train/2-2": 2.9293012619018555, "rewards_train/2-w": 1.5209598541259766, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 8.16198992729187, "rewards_train/margins_1": 2.672664999961853, "rewards_train/margins_2": 1.408341407775879, "step": 319 }, { "epoch": 0.96, "logps_train/policy_1_2": -177.75692749023438, "logps_train/policy_1_l": -350.03204345703125, "logps_train/policy_1_w": -158.95396423339844, "logps_train/policy_2_2": -138.27537536621094, "logps_train/policy_2_w": -218.2246856689453, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -304.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 1.2778472900390625, "rewards_train/1-l": -4.462164878845215, "rewards_train/1-w": 3.3389785289764404, "rewards_train/2-2": 2.6935558319091797, "rewards_train/2-w": 0.8087819814682007, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.801143407821655, "rewards_train/margins_1": 2.061131238937378, "rewards_train/margins_2": 1.884773850440979, "step": 319 }, { "epoch": 0.96, "logps_train/policy_1_2": -200.93191528320312, "logps_train/policy_1_l": -227.4688262939453, "logps_train/policy_1_w": -107.8238525390625, "logps_train/policy_2_2": -165.33004760742188, "logps_train/policy_2_w": -145.66514587402344, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.172432780265808, "rewards_train/1-l": -2.5039141178131104, "rewards_train/1-w": 2.6035521030426025, "rewards_train/2-2": 2.381056785583496, "rewards_train/2-w": 1.195594072341919, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.107466220855713, "rewards_train/margins_1": 1.4311193227767944, "rewards_train/margins_2": 1.1854627132415771, "step": 319 }, { "epoch": 0.96, "logps_train/policy_1_2": -223.11822509765625, "logps_train/policy_1_l": -171.75328063964844, "logps_train/policy_1_w": -143.50430297851562, "logps_train/policy_2_2": -165.3943634033203, "logps_train/policy_2_w": -198.588134765625, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.5038025379180908, "rewards_train/1-l": -1.2003276348114014, "rewards_train/1-w": 2.902695894241333, "rewards_train/2-2": 3.463688611984253, "rewards_train/2-w": 1.5474374294281006, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.103023529052734, "rewards_train/margins_1": 1.3988933563232422, "rewards_train/margins_2": 1.9162511825561523, "step": 319 }, { "epoch": 0.96, "logps_train/policy_1_2": -120.688232421875, "logps_train/policy_1_l": -95.35369873046875, "logps_train/policy_1_w": -72.89781188964844, "logps_train/policy_2_2": -92.01927185058594, "logps_train/policy_2_w": -97.47171020507812, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -86.5, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -106.0, "rewards_train/1-2": 0.5905515551567078, "rewards_train/1-l": -0.8521661758422852, "rewards_train/1-w": 1.4133437871932983, "rewards_train/2-2": 1.6078383922576904, "rewards_train/2-w": 0.8508762121200562, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.2655099630355835, "rewards_train/margins_1": 0.8227922320365906, "rewards_train/margins_2": 0.7569621801376343, "step": 319 }, { "epoch": 0.96, "logps_train/policy_1_2": -169.35977172851562, "logps_train/policy_1_l": -198.75302124023438, "logps_train/policy_1_w": -104.34912109375, "logps_train/policy_2_2": -137.75669860839844, "logps_train/policy_2_w": -131.82858276367188, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.6921486854553223, "rewards_train/1-l": -2.8043055534362793, "rewards_train/1-w": 2.054931640625, "rewards_train/2-2": 2.7712059020996094, "rewards_train/2-w": 1.177297830581665, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.859237194061279, "rewards_train/margins_1": 0.36278295516967773, "rewards_train/margins_2": 1.5939080715179443, "step": 319 }, { "epoch": 0.96, "logps_train/policy_1_2": -215.96563720703125, "logps_train/policy_1_l": -188.9884033203125, "logps_train/policy_1_w": -149.01458740234375, "logps_train/policy_2_2": -164.85128784179688, "logps_train/policy_2_w": -192.79052734375, "logps_train/ref_1_2": -229.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.3253101110458374, "rewards_train/1-l": -1.9058706760406494, "rewards_train/1-w": 3.1508846282958984, "rewards_train/2-2": 2.935575008392334, "rewards_train/2-w": 1.7709461450576782, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.056755304336548, "rewards_train/margins_1": 1.825574517250061, "rewards_train/margins_2": 1.1646288633346558, "step": 319 }, { "epoch": 0.96, "logps_train/policy_1_2": -200.2440185546875, "logps_train/policy_1_l": -176.5542449951172, "logps_train/policy_1_w": -174.341552734375, "logps_train/policy_2_2": -165.93719482421875, "logps_train/policy_2_w": -221.82968139648438, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -235.0, "rewards_train/1-2": 0.6693493723869324, "rewards_train/1-l": -1.539799451828003, "rewards_train/1-w": 2.698655605316162, "rewards_train/2-2": 1.9672186374664307, "rewards_train/2-w": 1.263126015663147, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.238455057144165, "rewards_train/margins_1": 2.0293062329292297, "rewards_train/margins_2": 0.7040926218032837, "step": 319 }, { "epoch": 0.96, "learning_rate": 2.8935253694962414e-06, "loss": 0.6283, "step": 320 }, { "epoch": 0.96, "logps_train/policy_1_2": -120.0096664428711, "logps_train/policy_1_l": -111.39749145507812, "logps_train/policy_1_w": -82.5558853149414, "logps_train/policy_2_2": -92.79296112060547, "logps_train/policy_2_w": -117.86639404296875, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -99.5, "logps_train/ref_1_w": -98.5, "logps_train/ref_2_2": -113.5, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": 1.1013765335083008, "rewards_train/1-l": -1.154201626777649, "rewards_train/1-w": 1.5772241353988647, "rewards_train/2-2": 2.041016101837158, "rewards_train/2-w": 0.9742984175682068, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.7314257621765137, "rewards_train/margins_1": 0.47584760189056396, "rewards_train/margins_2": 1.0667176842689514, "step": 320 }, { "epoch": 0.96, "logps_train/policy_1_2": -144.12222290039062, "logps_train/policy_1_l": -115.37882232666016, "logps_train/policy_1_w": -137.1527099609375, "logps_train/policy_2_2": -115.41498565673828, "logps_train/policy_2_w": -173.0782470703125, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": 1.7127768993377686, "rewards_train/1-l": -1.7488200664520264, "rewards_train/1-w": 3.530041217803955, "rewards_train/2-2": 2.659282684326172, "rewards_train/2-w": 2.3453006744384766, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.2788612842559814, "rewards_train/margins_1": 1.8172643184661865, "rewards_train/margins_2": 0.3139820098876953, "step": 320 }, { "epoch": 0.96, "logps_train/policy_1_2": -189.8386993408203, "logps_train/policy_1_l": -124.69882202148438, "logps_train/policy_1_w": -146.62286376953125, "logps_train/policy_2_2": -153.24069213867188, "logps_train/policy_2_w": -192.77096557617188, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 0.8798020482063293, "rewards_train/1-l": -0.8972256779670715, "rewards_train/1-w": 2.5947446823120117, "rewards_train/2-2": 2.081789255142212, "rewards_train/2-w": 1.0477073192596436, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4919703602790833, "rewards_train/margins_1": 1.7149426341056824, "rewards_train/margins_2": 1.0340819358825684, "step": 320 }, { "epoch": 0.96, "logps_train/policy_1_2": -121.4472885131836, "logps_train/policy_1_l": -139.4947052001953, "logps_train/policy_1_w": -101.27752685546875, "logps_train/policy_2_2": -98.61697387695312, "logps_train/policy_2_w": -128.6357421875, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.1802711486816406, "rewards_train/1-l": -1.0855307579040527, "rewards_train/1-w": 1.9284969568252563, "rewards_train/2-2": 1.8414273262023926, "rewards_train/2-w": 1.2458007335662842, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.014027714729309, "rewards_train/margins_1": 0.7482258081436157, "rewards_train/margins_2": 0.5956265926361084, "step": 320 }, { "epoch": 0.96, "logps_train/policy_1_2": -215.64199829101562, "logps_train/policy_1_l": -170.04925537109375, "logps_train/policy_1_w": -174.26063537597656, "logps_train/policy_2_2": -167.87704467773438, "logps_train/policy_2_w": -227.37750244140625, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -249.0, "rewards_train/1-2": 1.9506443738937378, "rewards_train/1-l": -1.7631282806396484, "rewards_train/1-w": 4.302062034606934, "rewards_train/2-2": 3.7904207706451416, "rewards_train/2-w": 2.1372485160827637, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.065190315246582, "rewards_train/margins_1": 2.351417660713196, "rewards_train/margins_2": 1.653172254562378, "step": 320 }, { "epoch": 0.96, "logps_train/policy_1_2": -124.00654602050781, "logps_train/policy_1_l": -146.23089599609375, "logps_train/policy_1_w": -90.9397201538086, "logps_train/policy_2_2": -103.10299682617188, "logps_train/policy_2_w": -111.3466567993164, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -126.5, "rewards_train/1-2": 0.9903609752655029, "rewards_train/1-l": -1.8689875602722168, "rewards_train/1-w": 2.1364967823028564, "rewards_train/2-2": 1.6963412761688232, "rewards_train/2-w": 1.5012717247009277, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.005484342575073, "rewards_train/margins_1": 1.1461358070373535, "rewards_train/margins_2": 0.1950695514678955, "step": 320 }, { "epoch": 0.96, "logps_train/policy_1_2": -164.6905517578125, "logps_train/policy_1_l": -145.82742309570312, "logps_train/policy_1_w": -133.41427612304688, "logps_train/policy_2_2": -111.17925262451172, "logps_train/policy_2_w": -185.60281372070312, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -0.4159289002418518, "rewards_train/1-l": -1.0151643753051758, "rewards_train/1-w": 2.3835721015930176, "rewards_train/2-2": 1.4523875713348389, "rewards_train/2-w": 0.7522180080413818, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3987364768981934, "rewards_train/margins_1": 2.7995010018348694, "rewards_train/margins_2": 0.700169563293457, "step": 320 }, { "epoch": 0.96, "logps_train/policy_1_2": -119.86979675292969, "logps_train/policy_1_l": -140.42251586914062, "logps_train/policy_1_w": -87.55366516113281, "logps_train/policy_2_2": -99.89446258544922, "logps_train/policy_2_w": -108.87847900390625, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -102.5, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": 1.359114170074463, "rewards_train/1-l": -2.131315231323242, "rewards_train/1-w": 1.5258839130401611, "rewards_train/2-2": 1.909772276878357, "rewards_train/2-w": 0.7773869037628174, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6571991443634033, "rewards_train/margins_1": 0.16676974296569824, "rewards_train/margins_2": 1.1323853731155396, "step": 320 }, { "epoch": 0.96, "logps_train/policy_1_2": -113.07439422607422, "logps_train/policy_1_l": -137.70648193359375, "logps_train/policy_1_w": -68.51370239257812, "logps_train/policy_2_2": -93.4208755493164, "logps_train/policy_2_w": -87.35009765625, "logps_train/ref_1_2": -125.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -100.5, "rewards_train/1-2": 1.1613103151321411, "rewards_train/1-l": -1.704973816871643, "rewards_train/1-w": 2.0009734630584717, "rewards_train/2-2": 1.957912564277649, "rewards_train/2-w": 1.3149902820587158, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7059472799301147, "rewards_train/margins_1": 0.8396631479263306, "rewards_train/margins_2": 0.6429222822189331, "step": 321 }, { "epoch": 0.96, "logps_train/policy_1_2": -219.5948486328125, "logps_train/policy_1_l": -229.5576934814453, "logps_train/policy_1_w": -146.39364624023438, "logps_train/policy_2_2": -179.78726196289062, "logps_train/policy_2_w": -186.40328979492188, "logps_train/ref_1_2": -235.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.5467641353607178, "rewards_train/1-l": -2.760652542114258, "rewards_train/1-w": 2.712197780609131, "rewards_train/2-2": 3.1931490898132324, "rewards_train/2-w": 1.2081069946289062, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.472850322723389, "rewards_train/margins_1": 1.165433645248413, "rewards_train/margins_2": 1.9850420951843262, "step": 321 }, { "epoch": 0.96, "logps_train/policy_1_2": -147.60450744628906, "logps_train/policy_1_l": -124.74798583984375, "logps_train/policy_1_w": -129.01486206054688, "logps_train/policy_2_2": -125.10018920898438, "logps_train/policy_2_w": -157.64239501953125, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.225487470626831, "rewards_train/1-l": -1.606829047203064, "rewards_train/1-w": 2.444411277770996, "rewards_train/2-2": 2.08451247215271, "rewards_train/2-w": 1.1722838878631592, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.05124032497406, "rewards_train/margins_1": 1.218923807144165, "rewards_train/margins_2": 0.9122285842895508, "step": 321 }, { "epoch": 0.96, "logps_train/policy_1_2": -197.3831787109375, "logps_train/policy_1_l": -172.47117614746094, "logps_train/policy_1_w": -115.68650817871094, "logps_train/policy_2_2": -150.94900512695312, "logps_train/policy_2_w": -163.1844024658203, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 0.6730111241340637, "rewards_train/1-l": -2.276805877685547, "rewards_train/1-w": 2.8000996112823486, "rewards_train/2-2": 2.6570515632629395, "rewards_train/2-w": 1.298747181892395, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.0769054889678955, "rewards_train/margins_1": 2.127088487148285, "rewards_train/margins_2": 1.3583043813705444, "step": 321 }, { "epoch": 0.96, "logps_train/policy_1_2": -136.31954956054688, "logps_train/policy_1_l": -146.81553649902344, "logps_train/policy_1_w": -121.99884033203125, "logps_train/policy_2_2": -106.70318603515625, "logps_train/policy_2_w": -165.17742919921875, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 0.9848905801773071, "rewards_train/1-l": -1.9662702083587646, "rewards_train/1-w": 2.7532405853271484, "rewards_train/2-2": 2.144451379776001, "rewards_train/2-w": 1.4291315078735352, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.719510793685913, "rewards_train/margins_1": 1.7683500051498413, "rewards_train/margins_2": 0.7153198719024658, "step": 321 }, { "epoch": 0.96, "logps_train/policy_1_2": -84.20989990234375, "logps_train/policy_1_l": -145.4775848388672, "logps_train/policy_1_w": -81.31404113769531, "logps_train/policy_2_2": -60.41370391845703, "logps_train/policy_2_w": -110.79324340820312, "logps_train/ref_1_2": -92.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -76.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 0.8184629082679749, "rewards_train/1-l": -2.094486713409424, "rewards_train/1-w": 1.497502088546753, "rewards_train/2-2": 1.5355827808380127, "rewards_train/2-w": 0.9226285219192505, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.5919888019561768, "rewards_train/margins_1": 0.6790391802787781, "rewards_train/margins_2": 0.6129542589187622, "step": 321 }, { "epoch": 0.96, "logps_train/policy_1_2": -69.28556823730469, "logps_train/policy_1_l": -75.86761474609375, "logps_train/policy_1_w": -78.51023864746094, "logps_train/policy_2_2": -53.01811599731445, "logps_train/policy_2_w": -100.5479965209961, "logps_train/ref_1_2": -78.5, "logps_train/ref_1_l": -67.0, "logps_train/ref_1_w": -96.5, "logps_train/ref_2_2": -68.0, "logps_train/ref_2_w": -108.0, "rewards_train/1-2": 0.9222250580787659, "rewards_train/1-l": -0.8479427695274353, "rewards_train/1-w": 1.766554355621338, "rewards_train/2-2": 1.5065866708755493, "rewards_train/2-w": 0.7639504671096802, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.614497125148773, "rewards_train/margins_1": 0.844329297542572, "rewards_train/margins_2": 0.7426362037658691, "step": 321 }, { "epoch": 0.96, "logps_train/policy_1_2": -169.88877868652344, "logps_train/policy_1_l": -158.38528442382812, "logps_train/policy_1_w": -116.91758728027344, "logps_train/policy_2_2": -126.35502624511719, "logps_train/policy_2_w": -158.24508666992188, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.5861225128173828, "rewards_train/1-l": -1.7479028701782227, "rewards_train/1-w": 1.9988658428192139, "rewards_train/2-2": 3.0957469940185547, "rewards_train/2-w": 1.0004916191101074, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7467687129974365, "rewards_train/margins_1": 0.41274333000183105, "rewards_train/margins_2": 2.0952553749084473, "step": 321 }, { "epoch": 0.96, "learning_rate": 2.8691164100062035e-06, "loss": 0.6694, "step": 322 }, { "epoch": 0.96, "logps_train/policy_1_2": -136.37184143066406, "logps_train/policy_1_l": -106.52921295166016, "logps_train/policy_1_w": -100.65316009521484, "logps_train/policy_2_2": -109.47693634033203, "logps_train/policy_2_w": -129.3160400390625, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 0.9323465824127197, "rewards_train/1-l": -0.8654209971427917, "rewards_train/1-w": 2.5255532264709473, "rewards_train/2-2": 1.4526972770690918, "rewards_train/2-w": 1.4296742677688599, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.390974223613739, "rewards_train/margins_1": 1.5932066440582275, "rewards_train/margins_2": 0.023023009300231934, "step": 322 }, { "epoch": 0.96, "logps_train/policy_1_2": -202.84996032714844, "logps_train/policy_1_l": -201.03448486328125, "logps_train/policy_1_w": -203.67665100097656, "logps_train/policy_2_2": -171.1967010498047, "logps_train/policy_2_w": -234.62728881835938, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -233.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -253.0, "rewards_train/1-2": 1.7181291580200195, "rewards_train/1-l": -2.3573548793792725, "rewards_train/1-w": 2.907334327697754, "rewards_train/2-2": 3.1178300380706787, "rewards_train/2-w": 1.8497698307037354, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.264689207077026, "rewards_train/margins_1": 1.1892051696777344, "rewards_train/margins_2": 1.2680602073669434, "step": 322 }, { "epoch": 0.96, "logps_train/policy_1_2": -177.7313232421875, "logps_train/policy_1_l": -139.004150390625, "logps_train/policy_1_w": -123.03752136230469, "logps_train/policy_2_2": -132.15664672851562, "logps_train/policy_2_w": -164.98971557617188, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 0.9874157905578613, "rewards_train/1-l": -1.1301026344299316, "rewards_train/1-w": 2.8243722915649414, "rewards_train/2-2": 2.511289119720459, "rewards_train/2-w": 1.3197789192199707, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.954474925994873, "rewards_train/margins_1": 1.83695650100708, "rewards_train/margins_2": 1.1915102005004883, "step": 322 }, { "epoch": 0.96, "logps_train/policy_1_2": -264.9002380371094, "logps_train/policy_1_l": -221.25656127929688, "logps_train/policy_1_w": -194.87832641601562, "logps_train/policy_2_2": -231.73849487304688, "logps_train/policy_2_w": -239.0078125, "logps_train/ref_1_2": -280.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -260.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.628725290298462, "rewards_train/1-l": -1.8131577968597412, "rewards_train/1-w": 2.57466721534729, "rewards_train/2-2": 2.9699010848999023, "rewards_train/2-w": 1.4242186546325684, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.387825012207031, "rewards_train/margins_1": 0.9459419250488281, "rewards_train/margins_2": 1.545682430267334, "step": 322 }, { "epoch": 0.96, "logps_train/policy_1_2": -121.09454345703125, "logps_train/policy_1_l": -139.28854370117188, "logps_train/policy_1_w": -85.35206604003906, "logps_train/policy_2_2": -90.01750183105469, "logps_train/policy_2_w": -116.32560729980469, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.5342960357666016, "rewards_train/1-l": -2.6178197860717773, "rewards_train/1-w": 2.6835439205169678, "rewards_train/2-2": 2.2472734451293945, "rewards_train/2-w": 1.7611899375915527, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.301363706588745, "rewards_train/margins_1": 1.1492478847503662, "rewards_train/margins_2": 0.4860835075378418, "step": 322 }, { "epoch": 0.96, "logps_train/policy_1_2": -137.77508544921875, "logps_train/policy_1_l": -137.3827362060547, "logps_train/policy_1_w": -61.395774841308594, "logps_train/policy_2_2": -108.301513671875, "logps_train/policy_2_w": -86.62088012695312, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -74.5, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -93.5, "rewards_train/1-2": 0.5197567939758301, "rewards_train/1-l": -1.439836025238037, "rewards_train/1-w": 1.2950900793075562, "rewards_train/2-2": 1.7534420490264893, "rewards_train/2-w": 0.7054898142814636, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7349261045455933, "rewards_train/margins_1": 0.7753332853317261, "rewards_train/margins_2": 1.0479522347450256, "step": 322 }, { "epoch": 0.96, "logps_train/policy_1_2": -103.87237548828125, "logps_train/policy_1_l": -124.39410400390625, "logps_train/policy_1_w": -79.13792419433594, "logps_train/policy_2_2": -80.84175109863281, "logps_train/policy_2_w": -99.6301040649414, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -103.5, "logps_train/ref_2_w": -115.5, "rewards_train/1-2": 1.51432466506958, "rewards_train/1-l": -1.9311089515686035, "rewards_train/1-w": 2.501832962036133, "rewards_train/2-2": 2.2384817600250244, "rewards_train/2-w": 1.5901145935058594, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.432941913604736, "rewards_train/margins_1": 0.9875082969665527, "rewards_train/margins_2": 0.648367166519165, "step": 322 }, { "epoch": 0.96, "logps_train/policy_1_2": -148.41461181640625, "logps_train/policy_1_l": -153.31112670898438, "logps_train/policy_1_w": -134.56378173828125, "logps_train/policy_2_2": -132.17967224121094, "logps_train/policy_2_w": -158.0055389404297, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": 1.6757265329360962, "rewards_train/1-l": -1.5123636722564697, "rewards_train/1-w": 2.2850279808044434, "rewards_train/2-2": 2.137502431869507, "rewards_train/2-w": 1.507258415222168, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.797391653060913, "rewards_train/margins_1": 0.6093014478683472, "rewards_train/margins_2": 0.6302440166473389, "step": 322 }, { "epoch": 0.97, "logps_train/policy_1_2": -75.11412048339844, "logps_train/policy_1_l": -113.70387268066406, "logps_train/policy_1_w": -137.63502502441406, "logps_train/policy_2_2": -59.65764617919922, "logps_train/policy_2_w": -183.27178955078125, "logps_train/ref_1_2": -90.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -77.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.5147600173950195, "rewards_train/1-l": -1.6064218282699585, "rewards_train/1-w": 2.5794663429260254, "rewards_train/2-2": 1.7549388408660889, "rewards_train/2-w": 0.6423529386520386, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.185888171195984, "rewards_train/margins_1": 1.0647063255310059, "rewards_train/margins_2": 1.1125859022140503, "step": 323 }, { "epoch": 0.97, "logps_train/policy_1_2": -119.91098022460938, "logps_train/policy_1_l": -74.62442016601562, "logps_train/policy_1_w": -115.2288818359375, "logps_train/policy_2_2": -102.08061218261719, "logps_train/policy_2_w": -137.7635955810547, "logps_train/ref_1_2": -125.0, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -112.5, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.5049953460693359, "rewards_train/1-l": -0.44244685769081116, "rewards_train/1-w": 1.7107057571411133, "rewards_train/2-2": 1.0290473699569702, "rewards_train/2-w": 0.7599686980247498, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.1531526148319244, "rewards_train/margins_1": 1.2057104110717773, "rewards_train/margins_2": 0.26907867193222046, "step": 323 }, { "epoch": 0.97, "logps_train/policy_1_2": -178.72293090820312, "logps_train/policy_1_l": -187.58116149902344, "logps_train/policy_1_w": -96.47404479980469, "logps_train/policy_2_2": -149.47378540039062, "logps_train/policy_2_w": -126.59786987304688, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 1.4683316946029663, "rewards_train/1-l": -1.2167110443115234, "rewards_train/1-w": 2.512751579284668, "rewards_train/2-2": 2.465121269226074, "rewards_train/2-w": 2.0589635372161865, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7294626235961914, "rewards_train/margins_1": 1.0444198846817017, "rewards_train/margins_2": 0.4061577320098877, "step": 323 }, { "epoch": 0.97, "logps_train/policy_1_2": -224.1000213623047, "logps_train/policy_1_l": -252.29730224609375, "logps_train/policy_1_w": -246.56024169921875, "logps_train/policy_2_2": -183.6002197265625, "logps_train/policy_2_w": -298.4075927734375, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -278.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -312.0, "rewards_train/1-2": 1.3857009410858154, "rewards_train/1-l": -2.3021903038024902, "rewards_train/1-w": 3.1465156078338623, "rewards_train/2-2": 2.68450927734375, "rewards_train/2-w": 1.3527977466583252, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.4487059116363525, "rewards_train/margins_1": 1.7608146667480469, "rewards_train/margins_2": 1.3317115306854248, "step": 323 }, { "epoch": 0.97, "logps_train/policy_1_2": -175.3756103515625, "logps_train/policy_1_l": -172.1536865234375, "logps_train/policy_1_w": -59.59977722167969, "logps_train/policy_2_2": -133.79638671875, "logps_train/policy_2_w": -72.45032501220703, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -81.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -90.0, "rewards_train/1-2": 0.5327516794204712, "rewards_train/1-l": -2.2431037425994873, "rewards_train/1-w": 2.1138508319854736, "rewards_train/2-2": 2.0195798873901367, "rewards_train/2-w": 1.7088741064071655, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.356954574584961, "rewards_train/margins_1": 1.5810991525650024, "rewards_train/margins_2": 0.3107057809829712, "step": 323 }, { "epoch": 0.97, "logps_train/policy_1_2": -154.8494873046875, "logps_train/policy_1_l": -173.4779510498047, "logps_train/policy_1_w": -139.74789428710938, "logps_train/policy_2_2": -119.29216003417969, "logps_train/policy_2_w": -178.35858154296875, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 0.8517714142799377, "rewards_train/1-l": -1.2470142841339111, "rewards_train/1-w": 2.286147117614746, "rewards_train/2-2": 1.9104328155517578, "rewards_train/2-w": 0.8125802278518677, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.5331614017486572, "rewards_train/margins_1": 1.4343757033348083, "rewards_train/margins_2": 1.0978525876998901, "step": 323 }, { "epoch": 0.97, "logps_train/policy_1_2": -222.59886169433594, "logps_train/policy_1_l": -223.38320922851562, "logps_train/policy_1_w": -135.8814697265625, "logps_train/policy_2_2": -185.37013244628906, "logps_train/policy_2_w": -169.48831176757812, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.6283953189849854, "rewards_train/1-l": -2.469571352005005, "rewards_train/1-w": 2.730602741241455, "rewards_train/2-2": 2.743455410003662, "rewards_train/2-w": 2.0761680603027344, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.20017409324646, "rewards_train/margins_1": 1.1022074222564697, "rewards_train/margins_2": 0.6672873497009277, "step": 323 }, { "epoch": 0.97, "logps_train/policy_1_2": -85.36868286132812, "logps_train/policy_1_l": -135.84332275390625, "logps_train/policy_1_w": -67.98719787597656, "logps_train/policy_2_2": -72.5146484375, "logps_train/policy_2_w": -91.08625793457031, "logps_train/ref_1_2": -92.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -83.5, "logps_train/ref_2_2": -82.5, "logps_train/ref_2_w": -100.5, "rewards_train/1-2": 0.6573208570480347, "rewards_train/1-l": -1.5563297271728516, "rewards_train/1-w": 1.5795024633407593, "rewards_train/2-2": 0.9803714156150818, "rewards_train/2-w": 0.9372732043266296, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.135832190513611, "rewards_train/margins_1": 0.9221816062927246, "rewards_train/margins_2": 0.04309821128845215, "step": 323 }, { "epoch": 0.97, "learning_rate": 2.8446714254052617e-06, "loss": 0.6846, "step": 324 }, { "epoch": 0.97, "logps_train/policy_1_2": -125.09342956542969, "logps_train/policy_1_l": -130.75289916992188, "logps_train/policy_1_w": -110.4415512084961, "logps_train/policy_2_2": -94.31432342529297, "logps_train/policy_2_w": -143.1198272705078, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 0.9672189950942993, "rewards_train/1-l": -1.7481422424316406, "rewards_train/1-w": 1.9847509860992432, "rewards_train/2-2": 1.8966923952102661, "rewards_train/2-w": 1.206767201423645, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.732893228530884, "rewards_train/margins_1": 1.0175319910049438, "rewards_train/margins_2": 0.6899251937866211, "step": 324 }, { "epoch": 0.97, "logps_train/policy_1_2": -190.374267578125, "logps_train/policy_1_l": -144.9600830078125, "logps_train/policy_1_w": -140.83335876464844, "logps_train/policy_2_2": -155.7225341796875, "logps_train/policy_2_w": -165.53860473632812, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.3781991004943848, "rewards_train/1-l": -1.1932735443115234, "rewards_train/1-w": 3.326819896697998, "rewards_train/2-2": 2.4582133293151855, "rewards_train/2-w": 2.1727025508880615, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.5200934410095215, "rewards_train/margins_1": 1.9486207962036133, "rewards_train/margins_2": 0.285510778427124, "step": 324 }, { "epoch": 0.97, "logps_train/policy_1_2": -130.12767028808594, "logps_train/policy_1_l": -135.7981719970703, "logps_train/policy_1_w": -108.56361389160156, "logps_train/policy_2_2": -107.06046295166016, "logps_train/policy_2_w": -130.54010009765625, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.3075459003448486, "rewards_train/1-l": -0.8110183477401733, "rewards_train/1-w": 1.9049668312072754, "rewards_train/2-2": 1.818953275680542, "rewards_train/2-w": 1.0143498182296753, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7159851789474487, "rewards_train/margins_1": 0.5974209308624268, "rewards_train/margins_2": 0.8046034574508667, "step": 324 }, { "epoch": 0.97, "logps_train/policy_1_2": -136.71084594726562, "logps_train/policy_1_l": -191.02081298828125, "logps_train/policy_1_w": -95.55542755126953, "logps_train/policy_2_2": -104.2313232421875, "logps_train/policy_2_w": -125.52716064453125, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.1273536682128906, "rewards_train/1-l": -2.125030279159546, "rewards_train/1-w": 1.9620351791381836, "rewards_train/2-2": 2.0952281951904297, "rewards_train/2-w": 0.905877411365509, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.0870654582977295, "rewards_train/margins_1": 0.834681510925293, "rewards_train/margins_2": 1.1893507838249207, "step": 324 }, { "epoch": 0.97, "logps_train/policy_1_2": -96.48054504394531, "logps_train/policy_1_l": -86.88932800292969, "logps_train/policy_1_w": -79.38595581054688, "logps_train/policy_2_2": -74.66593933105469, "logps_train/policy_2_w": -109.13530731201172, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -80.5, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -91.5, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 1.2356363534927368, "rewards_train/1-l": -0.6581224799156189, "rewards_train/1-w": 2.102029323577881, "rewards_train/2-2": 1.6836020946502686, "rewards_train/2-w": 1.3083438873291016, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.7601518034934998, "rewards_train/margins_1": 0.866392970085144, "rewards_train/margins_2": 0.375258207321167, "step": 324 }, { "epoch": 0.97, "logps_train/policy_1_2": -191.16085815429688, "logps_train/policy_1_l": -191.4849090576172, "logps_train/policy_1_w": -136.1629180908203, "logps_train/policy_2_2": -151.5579376220703, "logps_train/policy_2_w": -170.18153381347656, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.1557905673980713, "rewards_train/1-l": -2.0863823890686035, "rewards_train/1-w": 2.437809467315674, "rewards_train/2-2": 2.764519214630127, "rewards_train/2-w": 1.443955421447754, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.524191856384277, "rewards_train/margins_1": 1.2820188999176025, "rewards_train/margins_2": 1.320563793182373, "step": 324 }, { "epoch": 0.97, "logps_train/policy_1_2": -125.8237075805664, "logps_train/policy_1_l": -151.47821044921875, "logps_train/policy_1_w": -117.36027526855469, "logps_train/policy_2_2": -97.41708374023438, "logps_train/policy_2_w": -151.13897705078125, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -121.5, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.3426294326782227, "rewards_train/1-l": -2.407390594482422, "rewards_train/1-w": 1.903425693511963, "rewards_train/2-2": 2.423135757446289, "rewards_train/2-w": 1.1548519134521484, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.310816287994385, "rewards_train/margins_1": 0.5607962608337402, "rewards_train/margins_2": 1.2682838439941406, "step": 324 }, { "epoch": 0.97, "logps_train/policy_1_2": -61.373085021972656, "logps_train/policy_1_l": -79.85494995117188, "logps_train/policy_1_w": -66.04286193847656, "logps_train/policy_2_2": -49.0341682434082, "logps_train/policy_2_w": -73.69853210449219, "logps_train/ref_1_2": -72.0, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -81.5, "logps_train/ref_2_2": -61.75, "logps_train/ref_2_w": -85.0, "rewards_train/1-2": 1.041597604751587, "rewards_train/1-l": -0.9868619441986084, "rewards_train/1-w": 1.5543076992034912, "rewards_train/2-2": 1.2641613483428955, "rewards_train/2-w": 1.1426469087600708, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.5411696434020996, "rewards_train/margins_1": 0.5127100944519043, "rewards_train/margins_2": 0.12151443958282471, "step": 324 }, { "epoch": 0.97, "logps_train/policy_1_2": -104.48303985595703, "logps_train/policy_1_l": -151.09957885742188, "logps_train/policy_1_w": -93.85971069335938, "logps_train/policy_2_2": -78.57813262939453, "logps_train/policy_2_w": -124.51913452148438, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.9532590508460999, "rewards_train/1-l": -1.9099576473236084, "rewards_train/1-w": 3.445669174194336, "rewards_train/2-2": 1.8640615940093994, "rewards_train/2-w": 2.147305488586426, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.355626821517944, "rewards_train/margins_1": 2.492410123348236, "rewards_train/margins_2": -0.28324389457702637, "step": 325 }, { "epoch": 0.97, "logps_train/policy_1_2": -210.12997436523438, "logps_train/policy_1_l": -191.46572875976562, "logps_train/policy_1_w": -152.3395538330078, "logps_train/policy_2_2": -169.95758056640625, "logps_train/policy_2_w": -188.96578979492188, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 2.1713786125183105, "rewards_train/1-l": -1.8692289590835571, "rewards_train/1-w": 2.085576295852661, "rewards_train/2-2": 3.804241418838501, "rewards_train/2-w": 1.0221710205078125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9548052549362183, "rewards_train/margins_1": -0.08580231666564941, "rewards_train/margins_2": 2.7820703983306885, "step": 325 }, { "epoch": 0.97, "logps_train/policy_1_2": -174.47787475585938, "logps_train/policy_1_l": -177.77896118164062, "logps_train/policy_1_w": -118.85987091064453, "logps_train/policy_2_2": -134.70640563964844, "logps_train/policy_2_w": -162.06430053710938, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.3596333265304565, "rewards_train/1-l": -1.128872036933899, "rewards_train/1-w": 1.8335437774658203, "rewards_train/2-2": 2.6754531860351562, "rewards_train/2-w": 0.577944278717041, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.9624158143997192, "rewards_train/margins_1": 0.47391045093536377, "rewards_train/margins_2": 2.0975089073181152, "step": 325 }, { "epoch": 0.97, "logps_train/policy_1_2": -139.8328857421875, "logps_train/policy_1_l": -69.00377655029297, "logps_train/policy_1_w": -74.47187805175781, "logps_train/policy_2_2": -110.8604965209961, "logps_train/policy_2_w": -107.27719116210938, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -58.25, "logps_train/ref_1_w": -94.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 1.2760872840881348, "rewards_train/1-l": -1.0650259256362915, "rewards_train/1-w": 1.9285935163497925, "rewards_train/2-2": 2.2764501571655273, "rewards_train/2-w": 0.7142724990844727, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.993619441986084, "rewards_train/margins_1": 0.6525062322616577, "rewards_train/margins_2": 1.5621776580810547, "step": 325 }, { "epoch": 0.97, "logps_train/policy_1_2": -159.59765625, "logps_train/policy_1_l": -198.9862060546875, "logps_train/policy_1_w": -138.21133422851562, "logps_train/policy_2_2": -123.14173126220703, "logps_train/policy_2_w": -183.89169311523438, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.3946292400360107, "rewards_train/1-l": -2.450866937637329, "rewards_train/1-w": 2.470273494720459, "rewards_train/2-2": 2.62410831451416, "rewards_train/2-w": 0.6092691421508789, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.921140432357788, "rewards_train/margins_1": 1.0756442546844482, "rewards_train/margins_2": 2.0148391723632812, "step": 325 }, { "epoch": 0.97, "logps_train/policy_1_2": -129.91998291015625, "logps_train/policy_1_l": -155.432861328125, "logps_train/policy_1_w": -169.45901489257812, "logps_train/policy_2_2": -103.05494689941406, "logps_train/policy_2_w": -209.28883361816406, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.3841737508773804, "rewards_train/1-l": -0.8885980844497681, "rewards_train/1-w": 2.8928678035736084, "rewards_train/2-2": 2.109739065170288, "rewards_train/2-w": 1.2750227451324463, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7814658880233765, "rewards_train/margins_1": 1.508694052696228, "rewards_train/margins_2": 0.8347163200378418, "step": 325 }, { "epoch": 0.97, "logps_train/policy_1_2": -128.6291961669922, "logps_train/policy_1_l": -175.6566925048828, "logps_train/policy_1_w": -62.026336669921875, "logps_train/policy_2_2": -96.57484436035156, "logps_train/policy_2_w": -92.78108978271484, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -79.0, "logps_train/ref_2_2": -114.5, "logps_train/ref_2_w": -104.0, "rewards_train/1-2": 0.6683312058448792, "rewards_train/1-l": -2.414252519607544, "rewards_train/1-w": 1.6641631126403809, "rewards_train/2-2": 1.7792341709136963, "rewards_train/2-w": 1.1336097717285156, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.078415632247925, "rewards_train/margins_1": 0.9958319067955017, "rewards_train/margins_2": 0.6456243991851807, "step": 325 }, { "epoch": 0.97, "logps_train/policy_1_2": -125.96377563476562, "logps_train/policy_1_l": -137.60330200195312, "logps_train/policy_1_w": -83.91419219970703, "logps_train/policy_2_2": -92.14004516601562, "logps_train/policy_2_w": -118.2698745727539, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 1.0821373462677002, "rewards_train/1-l": -2.248220920562744, "rewards_train/1-w": 2.0050649642944336, "rewards_train/2-2": 2.0980074405670166, "rewards_train/2-w": 0.710512638092041, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.253285884857178, "rewards_train/margins_1": 0.9229276180267334, "rewards_train/margins_2": 1.3874948024749756, "step": 325 }, { "epoch": 0.98, "learning_rate": 2.820192801480817e-06, "loss": 0.6919, "step": 326 }, { "epoch": 0.98, "logps_train/policy_1_2": -142.76324462890625, "logps_train/policy_1_l": -113.96344757080078, "logps_train/policy_1_w": -64.06038665771484, "logps_train/policy_2_2": -109.60104370117188, "logps_train/policy_2_w": -98.77710723876953, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -80.5, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -103.0, "rewards_train/1-2": 0.27680009603500366, "rewards_train/1-l": -1.4209542274475098, "rewards_train/1-w": 1.6328285932540894, "rewards_train/2-2": 1.3867706060409546, "rewards_train/2-w": 0.4246329069137573, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.053782820701599, "rewards_train/margins_1": 1.3560284972190857, "rewards_train/margins_2": 0.9621376991271973, "step": 326 }, { "epoch": 0.98, "logps_train/policy_1_2": -116.19007873535156, "logps_train/policy_1_l": -183.34703063964844, "logps_train/policy_1_w": -93.74940490722656, "logps_train/policy_2_2": -89.35665893554688, "logps_train/policy_2_w": -129.217041015625, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 0.6919294595718384, "rewards_train/1-l": -2.80599308013916, "rewards_train/1-w": 3.1117782592773438, "rewards_train/2-2": 1.5354278087615967, "rewards_train/2-w": 2.139233350753784, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.917771339416504, "rewards_train/margins_1": 2.4198487997055054, "rewards_train/margins_2": -0.6038055419921875, "step": 326 }, { "epoch": 0.98, "logps_train/policy_1_2": -110.62556457519531, "logps_train/policy_1_l": -169.33590698242188, "logps_train/policy_1_w": -142.2185516357422, "logps_train/policy_2_2": -88.595947265625, "logps_train/policy_2_w": -170.90902709960938, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.0564618110656738, "rewards_train/1-l": -1.5992164611816406, "rewards_train/1-w": 2.328925848007202, "rewards_train/2-2": 1.8038572072982788, "rewards_train/2-w": 1.1286289691925049, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.9281423091888428, "rewards_train/margins_1": 1.2724640369415283, "rewards_train/margins_2": 0.6752282381057739, "step": 326 }, { "epoch": 0.98, "logps_train/policy_1_2": -81.1606216430664, "logps_train/policy_1_l": -61.1424674987793, "logps_train/policy_1_w": -89.65357971191406, "logps_train/policy_2_2": -69.38916015625, "logps_train/policy_2_w": -102.93089294433594, "logps_train/ref_1_2": -95.5, "logps_train/ref_1_l": -53.5, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -88.0, "logps_train/ref_2_w": -117.0, "rewards_train/1-2": 1.4355007410049438, "rewards_train/1-l": -0.7660044431686401, "rewards_train/1-w": 1.7868878841400146, "rewards_train/2-2": 1.8243646621704102, "rewards_train/2-w": 1.406519889831543, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.552892327308655, "rewards_train/margins_1": 0.3513871431350708, "rewards_train/margins_2": 0.4178447723388672, "step": 326 }, { "epoch": 0.98, "logps_train/policy_1_2": -105.18440246582031, "logps_train/policy_1_l": -58.22288513183594, "logps_train/policy_1_w": -74.97953796386719, "logps_train/policy_2_2": -89.66241455078125, "logps_train/policy_2_w": -99.71349334716797, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -51.75, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -104.5, "logps_train/ref_2_w": -110.0, "rewards_train/1-2": 0.8991382122039795, "rewards_train/1-l": -0.6551008224487305, "rewards_train/1-w": 1.8137645721435547, "rewards_train/2-2": 1.47536039352417, "rewards_train/2-w": 1.0661505460739136, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.468865394592285, "rewards_train/margins_1": 0.9146263599395752, "rewards_train/margins_2": 0.40920984745025635, "step": 326 }, { "epoch": 0.98, "logps_train/policy_1_2": -135.5609588623047, "logps_train/policy_1_l": -176.89553833007812, "logps_train/policy_1_w": -134.52450561523438, "logps_train/policy_2_2": -107.52438354492188, "logps_train/policy_2_w": -175.36935424804688, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 1.0165612697601318, "rewards_train/1-l": -2.5684609413146973, "rewards_train/1-w": 2.576456069946289, "rewards_train/2-2": 1.7172887325286865, "rewards_train/2-w": 1.1997843980789185, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.144917011260986, "rewards_train/margins_1": 1.5598948001861572, "rewards_train/margins_2": 0.5175043344497681, "step": 326 }, { "epoch": 0.98, "logps_train/policy_1_2": -159.15765380859375, "logps_train/policy_1_l": -184.93551635742188, "logps_train/policy_1_w": -138.08956909179688, "logps_train/policy_2_2": -109.46711730957031, "logps_train/policy_2_w": -204.45504760742188, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 0.5076727867126465, "rewards_train/1-l": -2.157613515853882, "rewards_train/1-w": 3.4402620792388916, "rewards_train/2-2": 2.3595380783081055, "rewards_train/2-w": 1.0736358165740967, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.597875595092773, "rewards_train/margins_1": 2.932589292526245, "rewards_train/margins_2": 1.2859022617340088, "step": 326 }, { "epoch": 0.98, "logps_train/policy_1_2": -57.924163818359375, "logps_train/policy_1_l": -28.28272247314453, "logps_train/policy_1_w": -62.174888610839844, "logps_train/policy_2_2": -46.479400634765625, "logps_train/policy_2_w": -77.20828247070312, "logps_train/ref_1_2": -65.5, "logps_train/ref_1_l": -21.0, "logps_train/ref_1_w": -72.5, "logps_train/ref_2_2": -57.0, "logps_train/ref_2_w": -80.5, "rewards_train/1-2": 0.7392243146896362, "rewards_train/1-l": -0.7263190746307373, "rewards_train/1-w": 1.0582923889160156, "rewards_train/2-2": 1.0454679727554321, "rewards_train/2-w": 0.34557846188545227, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.784611463546753, "rewards_train/margins_1": 0.3190680742263794, "rewards_train/margins_2": 0.6998895108699799, "step": 326 }, { "epoch": 0.98, "logps_train/policy_1_2": -163.17788696289062, "logps_train/policy_1_l": -247.041259765625, "logps_train/policy_1_w": -112.22468566894531, "logps_train/policy_2_2": -130.53564453125, "logps_train/policy_2_w": -157.93087768554688, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.3462741374969482, "rewards_train/1-l": -3.800415515899658, "rewards_train/1-w": 2.838078498840332, "rewards_train/2-2": 1.9198739528656006, "rewards_train/2-w": 0.5350377559661865, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.63849401473999, "rewards_train/margins_1": 1.4918043613433838, "rewards_train/margins_2": 1.384836196899414, "step": 327 }, { "epoch": 0.98, "logps_train/policy_1_2": -166.3578338623047, "logps_train/policy_1_l": -136.4581298828125, "logps_train/policy_1_w": -142.79464721679688, "logps_train/policy_2_2": -133.5721893310547, "logps_train/policy_2_w": -181.95779418945312, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.2142161130905151, "rewards_train/1-l": -1.1165149211883545, "rewards_train/1-w": 2.5150656700134277, "rewards_train/2-2": 2.678328275680542, "rewards_train/2-w": 1.0995333194732666, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6315805912017822, "rewards_train/margins_1": 1.3008495569229126, "rewards_train/margins_2": 1.5787949562072754, "step": 327 }, { "epoch": 0.98, "logps_train/policy_1_2": -147.7530975341797, "logps_train/policy_1_l": -184.40762329101562, "logps_train/policy_1_w": -116.64848327636719, "logps_train/policy_2_2": -116.99320983886719, "logps_train/policy_2_w": -144.70687866210938, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.0903147459030151, "rewards_train/1-l": -1.795840859413147, "rewards_train/1-w": 2.3499948978424072, "rewards_train/2-2": 2.160053014755249, "rewards_train/2-w": 1.5089995861053467, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.145835757255554, "rewards_train/margins_1": 1.259680151939392, "rewards_train/margins_2": 0.6510534286499023, "step": 327 }, { "epoch": 0.98, "logps_train/policy_1_2": -156.13217163085938, "logps_train/policy_1_l": -167.84019470214844, "logps_train/policy_1_w": -97.89411926269531, "logps_train/policy_2_2": -137.94664001464844, "logps_train/policy_2_w": -114.85797119140625, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.3797523975372314, "rewards_train/1-l": -2.5652694702148438, "rewards_train/1-w": 2.5055103302001953, "rewards_train/2-2": 2.002992630004883, "rewards_train/2-w": 2.1450624465942383, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.070779800415039, "rewards_train/margins_1": 1.1257579326629639, "rewards_train/margins_2": -0.14206981658935547, "step": 327 }, { "epoch": 0.98, "logps_train/policy_1_2": -124.42717742919922, "logps_train/policy_1_l": -116.62263488769531, "logps_train/policy_1_w": -129.08961486816406, "logps_train/policy_2_2": -100.38685607910156, "logps_train/policy_2_w": -176.23739624023438, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 1.4275946617126465, "rewards_train/1-l": -0.5308191180229187, "rewards_train/1-w": 1.6676019430160522, "rewards_train/2-2": 1.9113138914108276, "rewards_train/2-w": -0.06592682003974915, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.25, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.198421061038971, "rewards_train/margins_1": 0.24000728130340576, "rewards_train/margins_2": 1.9772407114505768, "step": 327 }, { "epoch": 0.98, "logps_train/policy_1_2": -84.18844604492188, "logps_train/policy_1_l": -102.32870483398438, "logps_train/policy_1_w": -73.0197982788086, "logps_train/policy_2_2": -62.90914535522461, "logps_train/policy_2_w": -82.56892395019531, "logps_train/ref_1_2": -91.5, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": 0.7465850114822388, "rewards_train/1-l": -0.9704678058624268, "rewards_train/1-w": 1.2948952913284302, "rewards_train/2-2": 1.1172887086868286, "rewards_train/2-w": 1.0610766410827637, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.265363097190857, "rewards_train/margins_1": 0.5483102798461914, "rewards_train/margins_2": 0.05621206760406494, "step": 327 }, { "epoch": 0.98, "logps_train/policy_1_2": -149.77052307128906, "logps_train/policy_1_l": -172.36805725097656, "logps_train/policy_1_w": -127.49359130859375, "logps_train/policy_2_2": -121.42990112304688, "logps_train/policy_2_w": -163.7178497314453, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": 1.5674786567687988, "rewards_train/1-l": -1.7430553436279297, "rewards_train/1-w": 1.70064115524292, "rewards_train/2-2": 2.5171666145324707, "rewards_train/2-w": 0.3782151937484741, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4436964988708496, "rewards_train/margins_1": 0.1331624984741211, "rewards_train/margins_2": 2.1389514207839966, "step": 327 }, { "epoch": 0.98, "logps_train/policy_1_2": -238.13035583496094, "logps_train/policy_1_l": -224.74761962890625, "logps_train/policy_1_w": -119.39523315429688, "logps_train/policy_2_2": -197.30990600585938, "logps_train/policy_2_w": -157.5107421875, "logps_train/ref_1_2": -252.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": 1.3684091567993164, "rewards_train/1-l": -2.4885120391845703, "rewards_train/1-w": 3.413914442062378, "rewards_train/2-2": 3.1154942512512207, "rewards_train/2-w": 1.9245994091033936, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.902426481246948, "rewards_train/margins_1": 2.0455052852630615, "rewards_train/margins_2": 1.1908948421478271, "step": 327 }, { "epoch": 0.98, "learning_rate": 2.7956829273034146e-06, "loss": 0.7576, "step": 328 }, { "epoch": 0.98, "logps_train/policy_1_2": -115.8546371459961, "logps_train/policy_1_l": -128.503662109375, "logps_train/policy_1_w": -166.78915405273438, "logps_train/policy_2_2": -92.67963409423828, "logps_train/policy_2_w": -216.73501586914062, "logps_train/ref_1_2": -118.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -101.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 0.20945864915847778, "rewards_train/1-l": -0.8823980689048767, "rewards_train/1-w": 2.1992087364196777, "rewards_train/2-2": 0.8078180551528931, "rewards_train/2-w": 0.7374362349510193, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.0816068053245544, "rewards_train/margins_1": 1.9897500872612, "rewards_train/margins_2": 0.07038182020187378, "step": 328 }, { "epoch": 0.98, "logps_train/policy_1_2": -127.48109436035156, "logps_train/policy_1_l": -108.86539459228516, "logps_train/policy_1_w": -72.656494140625, "logps_train/policy_2_2": -92.34162902832031, "logps_train/policy_2_w": -97.45829772949219, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": 1.342905044555664, "rewards_train/1-l": -1.2893714904785156, "rewards_train/1-w": 2.512866497039795, "rewards_train/2-2": 2.1600756645202637, "rewards_train/2-w": 1.7374707460403442, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.8022379875183105, "rewards_train/margins_1": 1.1699614524841309, "rewards_train/margins_2": 0.42260491847991943, "step": 328 }, { "epoch": 0.98, "logps_train/policy_1_2": -180.5089569091797, "logps_train/policy_1_l": -208.0922088623047, "logps_train/policy_1_w": -144.60264587402344, "logps_train/policy_2_2": -136.3387908935547, "logps_train/policy_2_w": -219.15321350097656, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 0.9846514463424683, "rewards_train/1-l": -1.702971339225769, "rewards_train/1-w": 3.589735269546509, "rewards_train/2-2": 2.7223715782165527, "rewards_train/2-w": 1.312804102897644, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.292706608772278, "rewards_train/margins_1": 2.6050838232040405, "rewards_train/margins_2": 1.4095674753189087, "step": 328 }, { "epoch": 0.98, "logps_train/policy_1_2": -162.77243041992188, "logps_train/policy_1_l": -159.76492309570312, "logps_train/policy_1_w": -132.93272399902344, "logps_train/policy_2_2": -122.2222900390625, "logps_train/policy_2_w": -183.123046875, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 0.9852575659751892, "rewards_train/1-l": -1.8105988502502441, "rewards_train/1-w": 3.3192272186279297, "rewards_train/2-2": 2.1387085914611816, "rewards_train/2-w": 1.4564452171325684, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.129826068878174, "rewards_train/margins_1": 2.3339696526527405, "rewards_train/margins_2": 0.6822633743286133, "step": 328 }, { "epoch": 0.98, "logps_train/policy_1_2": -142.24716186523438, "logps_train/policy_1_l": -196.6624298095703, "logps_train/policy_1_w": -193.94488525390625, "logps_train/policy_2_2": -115.80834197998047, "logps_train/policy_2_w": -242.59255981445312, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.3487213850021362, "rewards_train/1-l": -1.6068679094314575, "rewards_train/1-w": 2.563323497772217, "rewards_train/2-2": 2.062915802001953, "rewards_train/2-w": 1.1485576629638672, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.170191407203674, "rewards_train/margins_1": 1.2146021127700806, "rewards_train/margins_2": 0.9143581390380859, "step": 328 }, { "epoch": 0.98, "logps_train/policy_1_2": -189.3507843017578, "logps_train/policy_1_l": -174.55567932128906, "logps_train/policy_1_w": -105.71293640136719, "logps_train/policy_2_2": -142.63819885253906, "logps_train/policy_2_w": -143.83509826660156, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 1.1547659635543823, "rewards_train/1-l": -1.5258800983428955, "rewards_train/1-w": 2.627534866333008, "rewards_train/2-2": 2.911180019378662, "rewards_train/2-w": 1.4602396488189697, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.153414964675903, "rewards_train/margins_1": 1.4727689027786255, "rewards_train/margins_2": 1.4509403705596924, "step": 328 }, { "epoch": 0.98, "logps_train/policy_1_2": -156.19207763671875, "logps_train/policy_1_l": -184.91680908203125, "logps_train/policy_1_w": -129.58433532714844, "logps_train/policy_2_2": -124.57746124267578, "logps_train/policy_2_w": -160.63919067382812, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.3753232955932617, "rewards_train/1-l": -2.018878936767578, "rewards_train/1-w": 2.247035503387451, "rewards_train/2-2": 2.462566375732422, "rewards_train/2-w": 1.511863112449646, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.265914440155029, "rewards_train/margins_1": 0.8717122077941895, "rewards_train/margins_2": 0.9507032632827759, "step": 328 }, { "epoch": 0.98, "logps_train/policy_1_2": -178.7205810546875, "logps_train/policy_1_l": -210.2707061767578, "logps_train/policy_1_w": -172.02125549316406, "logps_train/policy_2_2": -140.2340545654297, "logps_train/policy_2_w": -215.23741149902344, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.8373167514801025, "rewards_train/1-l": -1.7880079746246338, "rewards_train/1-w": 3.338498830795288, "rewards_train/2-2": 3.1515941619873047, "rewards_train/2-w": 1.9075095653533936, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.126506805419922, "rewards_train/margins_1": 1.5011820793151855, "rewards_train/margins_2": 1.2440845966339111, "step": 328 }, { "epoch": 0.99, "logps_train/policy_1_2": -134.21937561035156, "logps_train/policy_1_l": -129.515380859375, "logps_train/policy_1_w": -128.1868896484375, "logps_train/policy_2_2": -110.95206451416016, "logps_train/policy_2_w": -159.78521728515625, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.7929054498672485, "rewards_train/1-l": -1.239722490310669, "rewards_train/1-w": 2.062560558319092, "rewards_train/2-2": 1.426668643951416, "rewards_train/2-w": 0.7402278184890747, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.3022830486297607, "rewards_train/margins_1": 1.2696551084518433, "rewards_train/margins_2": 0.6864408254623413, "step": 329 }, { "epoch": 0.99, "logps_train/policy_1_2": -157.3193817138672, "logps_train/policy_1_l": -145.15138244628906, "logps_train/policy_1_w": -137.04913330078125, "logps_train/policy_2_2": -122.3943862915039, "logps_train/policy_2_w": -179.27122497558594, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 0.6725543737411499, "rewards_train/1-l": -0.8743181228637695, "rewards_train/1-w": 2.336493730545044, "rewards_train/2-2": 1.6140769720077515, "rewards_train/2-w": 0.37854090332984924, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2108118534088135, "rewards_train/margins_1": 1.663939356803894, "rewards_train/margins_2": 1.2355360686779022, "step": 329 }, { "epoch": 0.99, "logps_train/policy_1_2": -149.925537109375, "logps_train/policy_1_l": -192.04742431640625, "logps_train/policy_1_w": -126.11323547363281, "logps_train/policy_2_2": -117.9754638671875, "logps_train/policy_2_w": -157.94046020507812, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.1493399143218994, "rewards_train/1-l": -2.2781805992126465, "rewards_train/1-w": 1.7105507850646973, "rewards_train/2-2": 1.9917610883712769, "rewards_train/2-w": 0.6653287410736084, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9887313842773438, "rewards_train/margins_1": 0.5612108707427979, "rewards_train/margins_2": 1.3264323472976685, "step": 329 }, { "epoch": 0.99, "logps_train/policy_1_2": -204.49038696289062, "logps_train/policy_1_l": -260.6632995605469, "logps_train/policy_1_w": -219.47532653808594, "logps_train/policy_2_2": -179.98220825195312, "logps_train/policy_2_w": -258.1297607421875, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -231.0, "logps_train/ref_1_w": -248.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -272.0, "rewards_train/1-2": 2.5267415046691895, "rewards_train/1-l": -2.981175422668457, "rewards_train/1-w": 2.8407483100891113, "rewards_train/2-2": 3.3455288410186768, "rewards_train/2-w": 1.4487457275390625, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.821923732757568, "rewards_train/margins_1": 0.3140068054199219, "rewards_train/margins_2": 1.8967831134796143, "step": 329 }, { "epoch": 0.99, "logps_train/policy_1_2": -229.271484375, "logps_train/policy_1_l": -241.1981964111328, "logps_train/policy_1_w": -159.54766845703125, "logps_train/policy_2_2": -184.447509765625, "logps_train/policy_2_w": -207.036865234375, "logps_train/ref_1_2": -249.0, "logps_train/ref_1_l": -214.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 1.9541015625, "rewards_train/1-l": -2.6916937828063965, "rewards_train/1-w": 2.9952340126037598, "rewards_train/2-2": 3.388843059539795, "rewards_train/2-w": 1.011939287185669, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.686927795410156, "rewards_train/margins_1": 1.0411324501037598, "rewards_train/margins_2": 2.376903772354126, "step": 329 }, { "epoch": 0.99, "logps_train/policy_1_2": -117.26979064941406, "logps_train/policy_1_l": -131.1695556640625, "logps_train/policy_1_w": -144.070068359375, "logps_train/policy_2_2": -85.80783081054688, "logps_train/policy_2_w": -180.66546630859375, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 0.6722392439842224, "rewards_train/1-l": -1.7454711198806763, "rewards_train/1-w": 2.1140871047973633, "rewards_train/2-2": 1.7004667520523071, "rewards_train/2-w": 0.6381406188011169, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8595582246780396, "rewards_train/margins_1": 1.4418478608131409, "rewards_train/margins_2": 1.0623261332511902, "step": 329 }, { "epoch": 0.99, "logps_train/policy_1_2": -149.7832489013672, "logps_train/policy_1_l": -152.74972534179688, "logps_train/policy_1_w": -200.62930297851562, "logps_train/policy_2_2": -120.86286163330078, "logps_train/policy_2_w": -242.10159301757812, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -239.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -266.0, "rewards_train/1-2": 1.2107374668121338, "rewards_train/1-l": -1.5898163318634033, "rewards_train/1-w": 3.901132345199585, "rewards_train/2-2": 2.2309014797210693, "rewards_train/2-w": 2.278902769088745, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.490948677062988, "rewards_train/margins_1": 2.690394878387451, "rewards_train/margins_2": -0.04800128936767578, "step": 329 }, { "epoch": 0.99, "logps_train/policy_1_2": -82.60688781738281, "logps_train/policy_1_l": -140.7469482421875, "logps_train/policy_1_w": -129.6311492919922, "logps_train/policy_2_2": -66.64066314697266, "logps_train/policy_2_w": -160.5688018798828, "logps_train/ref_1_2": -99.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 1.6471240520477295, "rewards_train/1-l": -1.5523194074630737, "rewards_train/1-w": 2.9931344985961914, "rewards_train/2-2": 2.0343711376190186, "rewards_train/2-w": 1.494683027267456, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.545453906059265, "rewards_train/margins_1": 1.346010446548462, "rewards_train/margins_2": 0.5396881103515625, "step": 329 }, { "epoch": 0.99, "learning_rate": 2.771144194993564e-06, "loss": 0.5514, "step": 330 }, { "epoch": 0.99, "logps_train/policy_1_2": -106.78981018066406, "logps_train/policy_1_l": -148.65121459960938, "logps_train/policy_1_w": -166.44027709960938, "logps_train/policy_2_2": -91.29753112792969, "logps_train/policy_2_w": -201.5218963623047, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": 0.6223855018615723, "rewards_train/1-l": -1.8557466268539429, "rewards_train/1-w": 2.5028467178344727, "rewards_train/2-2": 1.1082355976104736, "rewards_train/2-w": 1.1665605306625366, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.3585933446884155, "rewards_train/margins_1": 1.8804612159729004, "rewards_train/margins_2": -0.05832493305206299, "step": 330 }, { "epoch": 0.99, "logps_train/policy_1_2": -155.26058959960938, "logps_train/policy_1_l": -233.825927734375, "logps_train/policy_1_w": -162.6644287109375, "logps_train/policy_2_2": -119.1642074584961, "logps_train/policy_2_w": -211.40740966796875, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.1239399909973145, "rewards_train/1-l": -2.2708730697631836, "rewards_train/1-w": 2.383556604385376, "rewards_train/2-2": 2.0523290634155273, "rewards_train/2-w": 0.6655082106590271, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.65442967414856, "rewards_train/margins_1": 1.2596166133880615, "rewards_train/margins_2": 1.3868208527565002, "step": 330 }, { "epoch": 0.99, "logps_train/policy_1_2": -139.57284545898438, "logps_train/policy_1_l": -64.68827819824219, "logps_train/policy_1_w": -65.82101440429688, "logps_train/policy_2_2": -113.85350799560547, "logps_train/policy_2_w": -92.07806396484375, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -58.0, "logps_train/ref_1_w": -79.5, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -99.0, "rewards_train/1-2": 1.0489654541015625, "rewards_train/1-l": -0.6537888646125793, "rewards_train/1-w": 1.3936803340911865, "rewards_train/2-2": 1.8724617958068848, "rewards_train/2-w": 0.6703186631202698, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.047469198703766, "rewards_train/margins_1": 0.344714879989624, "rewards_train/margins_2": 1.202143132686615, "step": 330 }, { "epoch": 0.99, "logps_train/policy_1_2": -128.8822021484375, "logps_train/policy_1_l": -126.33242797851562, "logps_train/policy_1_w": -127.19418334960938, "logps_train/policy_2_2": -104.9593734741211, "logps_train/policy_2_w": -158.58572387695312, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.6891238689422607, "rewards_train/1-l": -2.297304630279541, "rewards_train/1-w": 2.9157376289367676, "rewards_train/2-2": 2.1689059734344482, "rewards_train/2-w": 1.9886934757232666, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.213042259216309, "rewards_train/margins_1": 1.2266137599945068, "rewards_train/margins_2": 0.18021249771118164, "step": 330 }, { "epoch": 0.99, "logps_train/policy_1_2": -143.72108459472656, "logps_train/policy_1_l": -111.12791442871094, "logps_train/policy_1_w": -100.7642822265625, "logps_train/policy_2_2": -120.29144287109375, "logps_train/policy_2_w": -117.77835083007812, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.4575787782669067, "rewards_train/1-l": -1.8124988079071045, "rewards_train/1-w": 2.228259325027466, "rewards_train/2-2": 2.5321836471557617, "rewards_train/2-w": 1.5870089530944824, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.04075813293457, "rewards_train/margins_1": 0.7706805467605591, "rewards_train/margins_2": 0.9451746940612793, "step": 330 }, { "epoch": 0.99, "logps_train/policy_1_2": -194.94833374023438, "logps_train/policy_1_l": -216.3549346923828, "logps_train/policy_1_w": -167.81430053710938, "logps_train/policy_2_2": -148.31396484375, "logps_train/policy_2_w": -215.3729248046875, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -225.0, "rewards_train/1-2": 1.1350505352020264, "rewards_train/1-l": -3.1128382682800293, "rewards_train/1-w": 2.8382978439331055, "rewards_train/2-2": 2.7160651683807373, "rewards_train/2-w": 0.9423950910568237, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.951136112213135, "rewards_train/margins_1": 1.703247308731079, "rewards_train/margins_2": 1.7736700773239136, "step": 330 }, { "epoch": 0.99, "logps_train/policy_1_2": -118.23319244384766, "logps_train/policy_1_l": -107.04188537597656, "logps_train/policy_1_w": -130.0994110107422, "logps_train/policy_2_2": -96.53700256347656, "logps_train/policy_2_w": -165.9354248046875, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": 1.118868350982666, "rewards_train/1-l": -1.1620014905929565, "rewards_train/1-w": 2.687324047088623, "rewards_train/2-2": 1.6462023258209229, "rewards_train/2-w": 1.1154414415359497, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8493255376815796, "rewards_train/margins_1": 1.568455696105957, "rewards_train/margins_2": 0.5307608842849731, "step": 330 }, { "epoch": 0.99, "logps_train/policy_1_2": -126.37993621826172, "logps_train/policy_1_l": -114.63206481933594, "logps_train/policy_1_w": -82.5078353881836, "logps_train/policy_2_2": -95.72994995117188, "logps_train/policy_2_w": -116.34725952148438, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 0.6862255930900574, "rewards_train/1-l": -0.7430890798568726, "rewards_train/1-w": 2.032028913497925, "rewards_train/2-2": 1.4652864933013916, "rewards_train/2-w": 1.2902741432189941, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.7751179933547974, "rewards_train/margins_1": 1.3458033204078674, "rewards_train/margins_2": 0.17501235008239746, "step": 330 }, { "epoch": 0.99, "logps_train/policy_1_2": -174.51296997070312, "logps_train/policy_1_l": -120.32121276855469, "logps_train/policy_1_w": -90.989990234375, "logps_train/policy_2_2": -138.76016235351562, "logps_train/policy_2_w": -115.64741516113281, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.8112026453018188, "rewards_train/1-l": -1.7655202150344849, "rewards_train/1-w": 1.8353755474090576, "rewards_train/2-2": 2.8989834785461426, "rewards_train/2-w": 1.0469777584075928, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6008957624435425, "rewards_train/margins_1": 0.02417290210723877, "rewards_train/margins_2": 1.8520057201385498, "step": 331 }, { "epoch": 0.99, "logps_train/policy_1_2": -105.73617553710938, "logps_train/policy_1_l": -85.37757873535156, "logps_train/policy_1_w": -87.70310974121094, "logps_train/policy_2_2": -93.76359558105469, "logps_train/policy_2_w": -110.39631652832031, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -77.0, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -126.5, "rewards_train/1-2": 1.1256012916564941, "rewards_train/1-l": -0.8246713280677795, "rewards_train/1-w": 2.0515639781951904, "rewards_train/2-2": 1.6056716442108154, "rewards_train/2-w": 1.6259933710098267, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.87623530626297, "rewards_train/margins_1": 0.9259626865386963, "rewards_train/margins_2": -0.02032172679901123, "step": 331 }, { "epoch": 0.99, "logps_train/policy_1_2": -145.7338409423828, "logps_train/policy_1_l": -163.19679260253906, "logps_train/policy_1_w": -148.02822875976562, "logps_train/policy_2_2": -124.99959564208984, "logps_train/policy_2_w": -179.95115661621094, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.375053882598877, "rewards_train/1-l": -1.5259296894073486, "rewards_train/1-w": 2.92842698097229, "rewards_train/2-2": 2.2359774112701416, "rewards_train/2-w": 1.8861347436904907, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.454356670379639, "rewards_train/margins_1": 1.553373098373413, "rewards_train/margins_2": 0.3498426675796509, "step": 331 }, { "epoch": 0.99, "logps_train/policy_1_2": -192.90304565429688, "logps_train/policy_1_l": -207.6090087890625, "logps_train/policy_1_w": -158.91226196289062, "logps_train/policy_2_2": -170.75428771972656, "logps_train/policy_2_w": -189.45193481445312, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": 1.7768824100494385, "rewards_train/1-l": -2.258167266845703, "rewards_train/1-w": 2.916585922241211, "rewards_train/2-2": 2.676914930343628, "rewards_train/2-w": 1.987619400024414, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.174753189086914, "rewards_train/margins_1": 1.1397035121917725, "rewards_train/margins_2": 0.6892955303192139, "step": 331 }, { "epoch": 0.99, "logps_train/policy_1_2": -139.84014892578125, "logps_train/policy_1_l": -138.7677001953125, "logps_train/policy_1_w": -170.31944274902344, "logps_train/policy_2_2": -118.37428283691406, "logps_train/policy_2_w": -218.9353485107422, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.4253596067428589, "rewards_train/1-l": -1.442394495010376, "rewards_train/1-w": 2.8383684158325195, "rewards_train/2-2": 2.131321430206299, "rewards_train/2-w": 1.1064653396606445, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.2807629108428955, "rewards_train/margins_1": 1.4130088090896606, "rewards_train/margins_2": 1.0248560905456543, "step": 331 }, { "epoch": 0.99, "logps_train/policy_1_2": -135.44168090820312, "logps_train/policy_1_l": -145.31199645996094, "logps_train/policy_1_w": -147.48126220703125, "logps_train/policy_2_2": -111.50021362304688, "logps_train/policy_2_w": -176.41046142578125, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.8714568614959717, "rewards_train/1-l": -0.5309064388275146, "rewards_train/1-w": 2.3221867084503174, "rewards_train/2-2": 2.682791233062744, "rewards_train/2-w": 1.2667667865753174, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.853093147277832, "rewards_train/margins_1": 0.4507298469543457, "rewards_train/margins_2": 1.4160244464874268, "step": 331 }, { "epoch": 0.99, "logps_train/policy_1_2": -142.16213989257812, "logps_train/policy_1_l": -149.26788330078125, "logps_train/policy_1_w": -138.9483184814453, "logps_train/policy_2_2": -106.91868591308594, "logps_train/policy_2_w": -167.77951049804688, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.2415984869003296, "rewards_train/1-l": -1.7197562456130981, "rewards_train/1-w": 2.351262331008911, "rewards_train/2-2": 2.259889602661133, "rewards_train/2-w": 1.3808366060256958, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.071018576622009, "rewards_train/margins_1": 1.1096638441085815, "rewards_train/margins_2": 0.879052996635437, "step": 331 }, { "epoch": 0.99, "logps_train/policy_1_2": -127.43743133544922, "logps_train/policy_1_l": -115.36131286621094, "logps_train/policy_1_w": -91.64734649658203, "logps_train/policy_2_2": -108.16365051269531, "logps_train/policy_2_w": -111.52003479003906, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 0.8916082382202148, "rewards_train/1-l": -0.9443340301513672, "rewards_train/1-w": 1.771593689918518, "rewards_train/2-2": 1.575040578842163, "rewards_train/2-w": 1.0999491214752197, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.7159277200698853, "rewards_train/margins_1": 0.8799854516983032, "rewards_train/margins_2": 0.47509145736694336, "step": 331 }, { "epoch": 0.99, "learning_rate": 2.7465789994882796e-06, "loss": 0.6724, "step": 332 }, { "epoch": 0.99, "logps_train/policy_1_2": -100.58050537109375, "logps_train/policy_1_l": -81.64696502685547, "logps_train/policy_1_w": -113.47941589355469, "logps_train/policy_2_2": -85.39246368408203, "logps_train/policy_2_w": -137.29837036132812, "logps_train/ref_1_2": -108.5, "logps_train/ref_1_l": -77.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -97.5, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 0.7912658452987671, "rewards_train/1-l": -0.44047781825065613, "rewards_train/1-w": 2.2090892791748047, "rewards_train/2-2": 1.2234491109848022, "rewards_train/2-w": 1.3092265129089355, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.649567097425461, "rewards_train/margins_1": 1.4178234338760376, "rewards_train/margins_2": -0.0857774019241333, "step": 332 }, { "epoch": 0.99, "logps_train/policy_1_2": -152.92628479003906, "logps_train/policy_1_l": -140.3627471923828, "logps_train/policy_1_w": -117.71113586425781, "logps_train/policy_2_2": -107.37646484375, "logps_train/policy_2_w": -157.13539123535156, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 0.8933095932006836, "rewards_train/1-l": -1.7726033926010132, "rewards_train/1-w": 2.5027146339416504, "rewards_train/2-2": 1.9795407056808472, "rewards_train/2-w": 1.3239613771438599, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.275318026542664, "rewards_train/margins_1": 1.6094050407409668, "rewards_train/margins_2": 0.6555793285369873, "step": 332 }, { "epoch": 0.99, "logps_train/policy_1_2": -54.974609375, "logps_train/policy_1_l": -91.02246856689453, "logps_train/policy_1_w": -68.37568664550781, "logps_train/policy_2_2": -40.474613189697266, "logps_train/policy_2_w": -90.63932800292969, "logps_train/ref_1_2": -61.0, "logps_train/ref_1_l": -73.0, "logps_train/ref_1_w": -83.0, "logps_train/ref_2_2": -50.75, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 0.6005858182907104, "rewards_train/1-l": -1.7696295976638794, "rewards_train/1-w": 1.475907564163208, "rewards_train/2-2": 1.0261714458465576, "rewards_train/2-w": 0.6973953247070312, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.2455371618270874, "rewards_train/margins_1": 0.8753217458724976, "rewards_train/margins_2": 0.32877612113952637, "step": 332 }, { "epoch": 0.99, "logps_train/policy_1_2": -150.77581787109375, "logps_train/policy_1_l": -306.4610595703125, "logps_train/policy_1_w": -184.28794860839844, "logps_train/policy_2_2": -122.75318908691406, "logps_train/policy_2_w": -227.2465362548828, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -270.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -244.0, "rewards_train/1-2": 1.8442935943603516, "rewards_train/1-l": -3.6117327213287354, "rewards_train/1-w": 2.9337055683135986, "rewards_train/2-2": 2.4629623889923096, "rewards_train/2-w": 1.7097194194793701, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.545438289642334, "rewards_train/margins_1": 1.089411973953247, "rewards_train/margins_2": 0.7532429695129395, "step": 332 }, { "epoch": 0.99, "logps_train/policy_1_2": -174.13528442382812, "logps_train/policy_1_l": -207.58224487304688, "logps_train/policy_1_w": -114.83679962158203, "logps_train/policy_2_2": -141.58615112304688, "logps_train/policy_2_w": -144.3707733154297, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.831784725189209, "rewards_train/1-l": -2.8976783752441406, "rewards_train/1-w": 2.5616326332092285, "rewards_train/2-2": 2.8226351737976074, "rewards_train/2-w": 1.4113597869873047, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.459311008453369, "rewards_train/margins_1": 0.7298479080200195, "rewards_train/margins_2": 1.4112753868103027, "step": 332 }, { "epoch": 0.99, "logps_train/policy_1_2": -127.73971557617188, "logps_train/policy_1_l": -57.466400146484375, "logps_train/policy_1_w": -48.601619720458984, "logps_train/policy_2_2": -86.2231674194336, "logps_train/policy_2_w": -71.90766906738281, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -49.0, "logps_train/ref_1_w": -62.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -80.0, "rewards_train/1-2": 0.944778323173523, "rewards_train/1-l": -0.8430265188217163, "rewards_train/1-w": 1.3317323923110962, "rewards_train/2-2": 2.296433210372925, "rewards_train/2-w": 0.7926312685012817, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.1747589111328125, "rewards_train/margins_1": 0.38695406913757324, "rewards_train/margins_2": 1.503801941871643, "step": 332 }, { "epoch": 0.99, "logps_train/policy_1_2": -106.46654510498047, "logps_train/policy_1_l": -156.83428955078125, "logps_train/policy_1_w": -140.00552368164062, "logps_train/policy_2_2": -86.5515365600586, "logps_train/policy_2_w": -175.14743041992188, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 1.0345953702926636, "rewards_train/1-l": -2.396124839782715, "rewards_train/1-w": 2.3119468688964844, "rewards_train/2-2": 1.569846510887146, "rewards_train/2-w": 1.1930687427520752, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.708071708679199, "rewards_train/margins_1": 1.2773514986038208, "rewards_train/margins_2": 0.3767777681350708, "step": 332 }, { "epoch": 0.99, "logps_train/policy_1_2": -65.18317413330078, "logps_train/policy_1_l": -69.82592010498047, "logps_train/policy_1_w": -34.70880889892578, "logps_train/policy_2_2": -47.941829681396484, "logps_train/policy_2_w": -48.74714279174805, "logps_train/ref_1_2": -72.5, "logps_train/ref_1_l": -52.5, "logps_train/ref_1_w": -45.25, "logps_train/ref_2_2": -62.5, "logps_train/ref_2_w": -55.0, "rewards_train/1-2": 0.737932562828064, "rewards_train/1-l": -1.7478265762329102, "rewards_train/1-w": 1.0572443008422852, "rewards_train/2-2": 1.4612858295440674, "rewards_train/2-w": 0.6479417085647583, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.8050708770751953, "rewards_train/margins_1": 0.3193117380142212, "rewards_train/margins_2": 0.8133441209793091, "step": 332 }, { "epoch": 1.0, "logps_train/policy_1_2": -159.22140502929688, "logps_train/policy_1_l": -266.9794921875, "logps_train/policy_1_w": -193.79086303710938, "logps_train/policy_2_2": -135.20608520507812, "logps_train/policy_2_w": -234.71820068359375, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -233.0, "logps_train/ref_1_w": -226.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -251.0, "rewards_train/1-2": 1.964578628540039, "rewards_train/1-l": -3.365135669708252, "rewards_train/1-w": 3.1709141731262207, "rewards_train/2-2": 2.7403292655944824, "rewards_train/2-w": 1.6000535488128662, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.536049842834473, "rewards_train/margins_1": 1.2063355445861816, "rewards_train/margins_2": 1.1402757167816162, "step": 333 }, { "epoch": 1.0, "logps_train/policy_1_2": -131.1813201904297, "logps_train/policy_1_l": -216.5028076171875, "logps_train/policy_1_w": -155.88648986816406, "logps_train/policy_2_2": -105.38996887207031, "logps_train/policy_2_w": -196.51039123535156, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 1.61624276638031, "rewards_train/1-l": -2.1502816677093506, "rewards_train/1-w": 2.2988505363464355, "rewards_train/2-2": 2.018815279006958, "rewards_train/2-w": 0.8520864248275757, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.449132204055786, "rewards_train/margins_1": 0.6826077699661255, "rewards_train/margins_2": 1.1667288541793823, "step": 333 }, { "epoch": 1.0, "logps_train/policy_1_2": -180.14776611328125, "logps_train/policy_1_l": -120.36198425292969, "logps_train/policy_1_w": -91.33811950683594, "logps_train/policy_2_2": -145.39773559570312, "logps_train/policy_2_w": -104.67068481445312, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -123.5, "rewards_train/1-2": 0.753972053527832, "rewards_train/1-l": -0.8287771940231323, "rewards_train/1-w": 2.2806406021118164, "rewards_train/2-2": 2.0860061645507812, "rewards_train/2-w": 1.8786344528198242, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.1094177961349487, "rewards_train/margins_1": 1.5266685485839844, "rewards_train/margins_2": 0.20737171173095703, "step": 333 }, { "epoch": 1.0, "logps_train/policy_1_2": -171.75079345703125, "logps_train/policy_1_l": -214.39498901367188, "logps_train/policy_1_w": -100.4843978881836, "logps_train/policy_2_2": -122.24273681640625, "logps_train/policy_2_w": -147.00999450683594, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.0428886413574219, "rewards_train/1-l": -2.5780985355377197, "rewards_train/1-w": 2.5062482357025146, "rewards_train/2-2": 2.4327573776245117, "rewards_train/2-w": 1.4661877155303955, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.084346771240234, "rewards_train/margins_1": 1.4633595943450928, "rewards_train/margins_2": 0.9665696620941162, "step": 333 }, { "epoch": 1.0, "logps_train/policy_1_2": -129.3525390625, "logps_train/policy_1_l": -119.76693725585938, "logps_train/policy_1_w": -104.39218139648438, "logps_train/policy_2_2": -104.36048889160156, "logps_train/policy_2_w": -135.8978271484375, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.6022460460662842, "rewards_train/1-l": -0.6977875828742981, "rewards_train/1-w": 2.074209213256836, "rewards_train/2-2": 2.2670764923095703, "rewards_train/2-w": 1.2615841627120972, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.771996796131134, "rewards_train/margins_1": 0.47196316719055176, "rewards_train/margins_2": 1.0054923295974731, "step": 333 }, { "epoch": 1.0, "logps_train/policy_1_2": -205.86021423339844, "logps_train/policy_1_l": -195.8751220703125, "logps_train/policy_1_w": -167.0345458984375, "logps_train/policy_2_2": -158.72914123535156, "logps_train/policy_2_w": -214.55885314941406, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.345228672027588, "rewards_train/1-l": -2.3687639236450195, "rewards_train/1-w": 3.146544933319092, "rewards_train/2-2": 2.9270858764648438, "rewards_train/2-w": 1.9316142797470093, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.515308856964111, "rewards_train/margins_1": 1.801316261291504, "rewards_train/margins_2": 0.9954715967178345, "step": 333 }, { "epoch": 1.0, "logps_train/policy_1_2": -140.12347412109375, "logps_train/policy_1_l": -102.58350372314453, "logps_train/policy_1_w": -73.89561462402344, "logps_train/policy_2_2": -100.75355529785156, "logps_train/policy_2_w": -109.2674560546875, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -92.5, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 1.480231523513794, "rewards_train/1-l": -1.1230968236923218, "rewards_train/1-w": 1.8617076873779297, "rewards_train/2-2": 2.6676125526428223, "rewards_train/2-w": 0.8867306709289551, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.9848045110702515, "rewards_train/margins_1": 0.38147616386413574, "rewards_train/margins_2": 1.7808818817138672, "step": 333 }, { "epoch": 1.0, "logps_train/policy_1_2": -168.08985900878906, "logps_train/policy_1_l": -227.50735473632812, "logps_train/policy_1_w": -133.62698364257812, "logps_train/policy_2_2": -139.8954315185547, "logps_train/policy_2_w": -167.79547119140625, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.4019510746002197, "rewards_train/1-l": -2.9917521476745605, "rewards_train/1-w": 2.354489326477051, "rewards_train/2-2": 2.3407299518585205, "rewards_train/2-w": 1.204826831817627, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.346241474151611, "rewards_train/margins_1": 0.952538251876831, "rewards_train/margins_2": 1.1359031200408936, "step": 333 }, { "epoch": 1.0, "learning_rate": 2.721989738307337e-06, "loss": 0.6523, "step": 334 }, { "epoch": 1.0, "logps_train/policy_1_2": -211.26864624023438, "logps_train/policy_1_l": -219.44326782226562, "logps_train/policy_1_w": -165.05764770507812, "logps_train/policy_2_2": -170.48834228515625, "logps_train/policy_2_w": -199.60675048828125, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 1.8793855905532837, "rewards_train/1-l": -2.3568270206451416, "rewards_train/1-w": 2.703221082687378, "rewards_train/2-2": 3.319916009902954, "rewards_train/2-w": 1.7268242835998535, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.0600481033325195, "rewards_train/margins_1": 0.8238354921340942, "rewards_train/margins_2": 1.5930917263031006, "step": 334 }, { "epoch": 1.0, "logps_train/policy_1_2": -135.24229431152344, "logps_train/policy_1_l": -163.0045928955078, "logps_train/policy_1_w": -128.23223876953125, "logps_train/policy_2_2": -122.15438842773438, "logps_train/policy_2_w": -152.14633178710938, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 2.1835827827453613, "rewards_train/1-l": -1.374384880065918, "rewards_train/1-w": 2.44630765914917, "rewards_train/2-2": 2.3939366340637207, "rewards_train/2-w": 1.5947422981262207, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.820692539215088, "rewards_train/margins_1": 0.2627248764038086, "rewards_train/margins_2": 0.7991943359375, "step": 334 }, { "epoch": 1.0, "logps_train/policy_1_2": -156.4681396484375, "logps_train/policy_1_l": -150.9161834716797, "logps_train/policy_1_w": -114.545654296875, "logps_train/policy_2_2": -129.56161499023438, "logps_train/policy_2_w": -153.69241333007812, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.745373010635376, "rewards_train/1-l": -1.2046072483062744, "rewards_train/1-w": 2.895435333251953, "rewards_train/2-2": 2.4977455139160156, "rewards_train/2-w": 1.488572597503662, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.1000425815582275, "rewards_train/margins_1": 1.1500623226165771, "rewards_train/margins_2": 1.0091729164123535, "step": 334 }, { "epoch": 1.0, "logps_train/policy_1_2": -96.62186431884766, "logps_train/policy_1_l": -104.16616821289062, "logps_train/policy_1_w": -72.25179290771484, "logps_train/policy_2_2": -77.50951385498047, "logps_train/policy_2_w": -97.82003784179688, "logps_train/ref_1_2": -106.5, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 0.9878138303756714, "rewards_train/1-l": -1.4002103805541992, "rewards_train/1-w": 2.0099775791168213, "rewards_train/2-2": 1.5896739959716797, "rewards_train/2-w": 1.3285431861877441, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.4101879596710205, "rewards_train/margins_1": 1.02216374874115, "rewards_train/margins_2": 0.26113080978393555, "step": 334 }, { "epoch": 1.0, "logps_train/policy_1_2": -103.4089584350586, "logps_train/policy_1_l": -126.96353149414062, "logps_train/policy_1_w": -125.75209045410156, "logps_train/policy_2_2": -83.03707885742188, "logps_train/policy_2_w": -154.77862548828125, "logps_train/ref_1_2": -116.5, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.3020731210708618, "rewards_train/1-l": -0.995572566986084, "rewards_train/1-w": 2.4029159545898438, "rewards_train/2-2": 1.8541042804718018, "rewards_train/2-w": 1.4760433435440063, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3984885215759277, "rewards_train/margins_1": 1.100842833518982, "rewards_train/margins_2": 0.3780609369277954, "step": 334 }, { "epoch": 1.0, "logps_train/policy_1_2": -164.53916931152344, "logps_train/policy_1_l": -213.35888671875, "logps_train/policy_1_w": -120.05722045898438, "logps_train/policy_2_2": -139.15478515625, "logps_train/policy_2_w": -147.83377075195312, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.782020092010498, "rewards_train/1-l": -2.959911584854126, "rewards_train/1-w": 3.26615309715271, "rewards_train/2-2": 2.759521722793579, "rewards_train/2-w": 2.218576192855835, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.226064682006836, "rewards_train/margins_1": 1.484133005142212, "rewards_train/margins_2": 0.5409455299377441, "step": 334 }, { "epoch": 1.0, "logps_train/policy_1_2": -123.17195129394531, "logps_train/policy_1_l": -163.5797119140625, "logps_train/policy_1_w": -156.954833984375, "logps_train/policy_2_2": -99.41358947753906, "logps_train/policy_2_w": -191.059326171875, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.651555061340332, "rewards_train/1-l": -1.899963617324829, "rewards_train/1-w": 2.8240485191345215, "rewards_train/2-2": 2.1773905754089355, "rewards_train/2-w": 1.1393790245056152, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.724012136459351, "rewards_train/margins_1": 1.1724934577941895, "rewards_train/margins_2": 1.0380115509033203, "step": 334 }, { "epoch": 1.0, "logps_train/policy_1_2": -102.85477447509766, "logps_train/policy_1_l": -140.74757385253906, "logps_train/policy_1_w": -97.87399291992188, "logps_train/policy_2_2": -74.62678527832031, "logps_train/policy_2_w": -135.17465209960938, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.100460410118103, "rewards_train/1-l": -1.7284189462661743, "rewards_train/1-w": 2.179007053375244, "rewards_train/2-2": 1.7670092582702637, "rewards_train/2-w": 1.0387839078903198, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.9074259996414185, "rewards_train/margins_1": 1.0785466432571411, "rewards_train/margins_2": 0.7282253503799438, "step": 334 }, { "epoch": 1.0, "logps_train/policy_1_2": -100.86329650878906, "logps_train/policy_1_l": -137.7821502685547, "logps_train/policy_1_w": -130.16761779785156, "logps_train/policy_2_2": -88.87252044677734, "logps_train/policy_2_w": -159.8109130859375, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.7289056777954102, "rewards_train/1-l": -1.2551689147949219, "rewards_train/1-w": 2.865269899368286, "rewards_train/2-2": 2.1250529289245605, "rewards_train/2-w": 1.6431279182434082, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.120438814163208, "rewards_train/margins_1": 1.136364221572876, "rewards_train/margins_2": 0.48192501068115234, "step": 335 }, { "epoch": 1.0, "logps_train/policy_1_2": -180.615478515625, "logps_train/policy_1_l": -236.96424865722656, "logps_train/policy_1_w": -130.234375, "logps_train/policy_2_2": -145.2742919921875, "logps_train/policy_2_w": -176.62677001953125, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 2.0048582553863525, "rewards_train/1-l": -3.114295482635498, "rewards_train/1-w": 2.6927738189697266, "rewards_train/2-2": 3.280188798904419, "rewards_train/2-w": 1.1534371376037598, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.807069301605225, "rewards_train/margins_1": 0.687915563583374, "rewards_train/margins_2": 2.126751661300659, "step": 335 }, { "epoch": 1.0, "logps_train/policy_1_2": -124.2298812866211, "logps_train/policy_1_l": -110.2125015258789, "logps_train/policy_1_w": -84.01478576660156, "logps_train/policy_2_2": -97.03318786621094, "logps_train/policy_2_w": -115.08110809326172, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 0.9666606187820435, "rewards_train/1-l": -1.6124613285064697, "rewards_train/1-w": 2.9000840187072754, "rewards_train/2-2": 1.693751335144043, "rewards_train/2-w": 1.7153265476226807, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.125, "rewards_train/margins": 4.512545347213745, "rewards_train/margins_1": 1.933423399925232, "rewards_train/margins_2": -0.021575212478637695, "step": 335 }, { "epoch": 1.0, "logps_train/policy_1_2": -141.01702880859375, "logps_train/policy_1_l": -165.37924194335938, "logps_train/policy_1_w": -77.63667297363281, "logps_train/policy_2_2": -113.02630615234375, "logps_train/policy_2_w": -99.45346069335938, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 1.3779834508895874, "rewards_train/1-l": -1.7991557121276855, "rewards_train/1-w": 1.8371140956878662, "rewards_train/2-2": 2.23799467086792, "rewards_train/2-w": 1.1577787399291992, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6362698078155518, "rewards_train/margins_1": 0.4591306447982788, "rewards_train/margins_2": 1.0802159309387207, "step": 335 }, { "epoch": 1.0, "logps_train/policy_1_2": -179.49319458007812, "logps_train/policy_1_l": -139.98699951171875, "logps_train/policy_1_w": -114.94976806640625, "logps_train/policy_2_2": -155.66578674316406, "logps_train/policy_2_w": -135.87921142578125, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 2.3006811141967773, "rewards_train/1-l": -1.183856725692749, "rewards_train/1-w": 3.10892915725708, "rewards_train/2-2": 3.2994368076324463, "rewards_train/2-w": 2.710516929626465, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.292785882949829, "rewards_train/margins_1": 0.8082480430603027, "rewards_train/margins_2": 0.5889198780059814, "step": 335 }, { "epoch": 1.0, "logps_train/policy_1_2": -215.7733154296875, "logps_train/policy_1_l": -234.47369384765625, "logps_train/policy_1_w": -108.91455841064453, "logps_train/policy_2_2": -170.82818603515625, "logps_train/policy_2_w": -140.58177185058594, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 2.024231433868408, "rewards_train/1-l": -3.0032284259796143, "rewards_train/1-w": 2.807762622833252, "rewards_train/2-2": 3.54921293258667, "rewards_train/2-w": 1.900416374206543, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.810991048812866, "rewards_train/margins_1": 0.7835311889648438, "rewards_train/margins_2": 1.648796558380127, "step": 335 }, { "epoch": 1.0, "logps_train/policy_1_2": -123.41968536376953, "logps_train/policy_1_l": -158.1240234375, "logps_train/policy_1_w": -161.91868591308594, "logps_train/policy_2_2": -95.37525939941406, "logps_train/policy_2_w": -205.42471313476562, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.5064690113067627, "rewards_train/1-l": -1.5305675268173218, "rewards_train/1-w": 3.1659436225891113, "rewards_train/2-2": 2.650169610977173, "rewards_train/2-w": 1.7215907573699951, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.696511149406433, "rewards_train/margins_1": 1.6594746112823486, "rewards_train/margins_2": 0.9285788536071777, "step": 335 }, { "epoch": 1.0, "logps_train/policy_1_2": -141.12646484375, "logps_train/policy_1_l": -149.391845703125, "logps_train/policy_1_w": -119.39166259765625, "logps_train/policy_2_2": -121.13134002685547, "logps_train/policy_2_w": -139.5102081298828, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.6478996276855469, "rewards_train/1-l": -1.5684823989868164, "rewards_train/1-w": 2.2299742698669434, "rewards_train/2-2": 2.2345221042633057, "rewards_train/2-w": 1.6251513957977295, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7984566688537598, "rewards_train/margins_1": 0.5820746421813965, "rewards_train/margins_2": 0.6093707084655762, "step": 335 }, { "epoch": 1.01, "learning_rate": 2.69737881131928e-06, "loss": 0.6408, "step": 336 }, { "epoch": 1.01, "logps_train/policy_1_2": -167.78457641601562, "logps_train/policy_1_l": -290.0144348144531, "logps_train/policy_1_w": -133.5522918701172, "logps_train/policy_2_2": -134.75872802734375, "logps_train/policy_2_w": -167.09942626953125, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -250.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": 1.7871677875518799, "rewards_train/1-l": -4.031130790710449, "rewards_train/1-w": 3.5728957653045654, "rewards_train/2-2": 2.87178373336792, "rewards_train/2-w": 2.7541208267211914, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 7.604026556015015, "rewards_train/margins_1": 1.7857279777526855, "rewards_train/margins_2": 0.11766290664672852, "step": 336 }, { "epoch": 1.01, "logps_train/policy_1_2": -83.88449096679688, "logps_train/policy_1_l": -68.60582733154297, "logps_train/policy_1_w": -82.2409439086914, "logps_train/policy_2_2": -61.50749588012695, "logps_train/policy_2_w": -102.34857177734375, "logps_train/ref_1_2": -96.5, "logps_train/ref_1_l": -61.25, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -82.5, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": 1.2295196056365967, "rewards_train/1-l": -0.7203484177589417, "rewards_train/1-w": 2.1868436336517334, "rewards_train/2-2": 2.0937819480895996, "rewards_train/2-w": 1.2815496921539307, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.907192051410675, "rewards_train/margins_1": 0.9573240280151367, "rewards_train/margins_2": 0.812232255935669, "step": 336 }, { "epoch": 1.01, "logps_train/policy_1_2": -220.6164093017578, "logps_train/policy_1_l": -172.66583251953125, "logps_train/policy_1_w": -181.37803649902344, "logps_train/policy_2_2": -186.8274383544922, "logps_train/policy_2_w": -222.49044799804688, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -219.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.9016398191452026, "rewards_train/1-l": -1.6618945598602295, "rewards_train/1-w": 4.168445587158203, "rewards_train/2-2": 3.240694046020508, "rewards_train/2-w": 3.1259543895721436, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.830340147018433, "rewards_train/margins_1": 2.2668057680130005, "rewards_train/margins_2": 0.11473965644836426, "step": 336 }, { "epoch": 1.01, "logps_train/policy_1_2": -160.576904296875, "logps_train/policy_1_l": -218.8889923095703, "logps_train/policy_1_w": -98.30841827392578, "logps_train/policy_2_2": -122.06163024902344, "logps_train/policy_2_w": -129.5426025390625, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 0.968870997428894, "rewards_train/1-l": -2.1896800994873047, "rewards_train/1-w": 2.1562674045562744, "rewards_train/2-2": 2.3176658153533936, "rewards_train/2-w": 1.141833782196045, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.345947504043579, "rewards_train/margins_1": 1.1873964071273804, "rewards_train/margins_2": 1.1758320331573486, "step": 336 }, { "epoch": 1.01, "logps_train/policy_1_2": -221.22703552246094, "logps_train/policy_1_l": -181.54635620117188, "logps_train/policy_1_w": -203.90310668945312, "logps_train/policy_2_2": -167.1774139404297, "logps_train/policy_2_w": -246.1958465576172, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -228.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 1.911670207977295, "rewards_train/1-l": -1.5480928421020508, "rewards_train/1-w": 2.476876735687256, "rewards_train/2-2": 3.8416342735290527, "rewards_train/2-w": 1.0226032733917236, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.024969577789307, "rewards_train/margins_1": 0.5652065277099609, "rewards_train/margins_2": 2.819031000137329, "step": 336 }, { "epoch": 1.01, "logps_train/policy_1_2": -166.41685485839844, "logps_train/policy_1_l": -226.39755249023438, "logps_train/policy_1_w": -128.98683166503906, "logps_train/policy_2_2": -131.06597900390625, "logps_train/policy_2_w": -167.835693359375, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.577845573425293, "rewards_train/1-l": -2.091318130493164, "rewards_train/1-w": 2.906003952026367, "rewards_train/2-2": 2.726604700088501, "rewards_train/2-w": 1.9922109842300415, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.997322082519531, "rewards_train/margins_1": 1.3281583786010742, "rewards_train/margins_2": 0.7343937158584595, "step": 336 }, { "epoch": 1.01, "logps_train/policy_1_2": -181.15408325195312, "logps_train/policy_1_l": -180.52810668945312, "logps_train/policy_1_w": -140.04290771484375, "logps_train/policy_2_2": -149.92283630371094, "logps_train/policy_2_w": -163.0769500732422, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 2.145529270172119, "rewards_train/1-l": -0.9653103947639465, "rewards_train/1-w": 2.5664126873016357, "rewards_train/2-2": 3.1475601196289062, "rewards_train/2-w": 1.9938678741455078, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.5317230820655823, "rewards_train/margins_1": 0.4208834171295166, "rewards_train/margins_2": 1.1536922454833984, "step": 336 }, { "epoch": 1.01, "logps_train/policy_1_2": -179.69664001464844, "logps_train/policy_1_l": -198.03497314453125, "logps_train/policy_1_w": -165.63772583007812, "logps_train/policy_2_2": -144.28683471679688, "logps_train/policy_2_w": -206.86752319335938, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -203.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.7928359508514404, "rewards_train/1-l": -2.0674381256103516, "rewards_train/1-w": 3.717477560043335, "rewards_train/2-2": 2.824441909790039, "rewards_train/2-w": 2.2663726806640625, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.7849156856536865, "rewards_train/margins_1": 1.9246416091918945, "rewards_train/margins_2": 0.5580692291259766, "step": 336 }, { "epoch": 1.01, "logps_train/policy_1_2": -159.3336944580078, "logps_train/policy_1_l": -153.38839721679688, "logps_train/policy_1_w": -98.72813415527344, "logps_train/policy_2_2": -134.03529357910156, "logps_train/policy_2_w": -133.93136596679688, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.810380458831787, "rewards_train/1-l": -1.865793228149414, "rewards_train/1-w": 2.661756753921509, "rewards_train/2-2": 2.8238139152526855, "rewards_train/2-w": 1.549050211906433, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.527549982070923, "rewards_train/margins_1": 0.8513762950897217, "rewards_train/margins_2": 1.2747637033462524, "step": 337 }, { "epoch": 1.01, "logps_train/policy_1_2": -147.24981689453125, "logps_train/policy_1_l": -107.51456451416016, "logps_train/policy_1_w": -113.96533203125, "logps_train/policy_2_2": -113.06747436523438, "logps_train/policy_2_w": -153.3358154296875, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.503143548965454, "rewards_train/1-l": -1.168252944946289, "rewards_train/1-w": 2.589404582977295, "rewards_train/2-2": 2.904971122741699, "rewards_train/2-w": 1.060168981552124, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.757657527923584, "rewards_train/margins_1": 1.0862610340118408, "rewards_train/margins_2": 1.8448021411895752, "step": 337 }, { "epoch": 1.01, "logps_train/policy_1_2": -169.54095458984375, "logps_train/policy_1_l": -124.58341217041016, "logps_train/policy_1_w": -108.25764465332031, "logps_train/policy_2_2": -135.0612335205078, "logps_train/policy_2_w": -149.41920471191406, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -115.5, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.5599675178527832, "rewards_train/1-l": -0.9058997631072998, "rewards_train/1-w": 2.0485520362854004, "rewards_train/2-2": 2.722782611846924, "rewards_train/2-w": 0.870578944683075, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.9544517993927, "rewards_train/margins_1": 0.4885845184326172, "rewards_train/margins_2": 1.8522036671638489, "step": 337 }, { "epoch": 1.01, "logps_train/policy_1_2": -173.40408325195312, "logps_train/policy_1_l": -160.74099731445312, "logps_train/policy_1_w": -109.71098327636719, "logps_train/policy_2_2": -145.9521484375, "logps_train/policy_2_w": -136.80661010742188, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 2.3674025535583496, "rewards_train/1-l": -2.0553503036499023, "rewards_train/1-w": 3.3183553218841553, "rewards_train/2-2": 3.3516597747802734, "rewards_train/2-w": 2.3802762031555176, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.373705625534058, "rewards_train/margins_1": 0.9509527683258057, "rewards_train/margins_2": 0.9713835716247559, "step": 337 }, { "epoch": 1.01, "logps_train/policy_1_2": -69.4909439086914, "logps_train/policy_1_l": -114.55866241455078, "logps_train/policy_1_w": -102.60452270507812, "logps_train/policy_2_2": -53.84362030029297, "logps_train/policy_2_w": -129.0014190673828, "logps_train/ref_1_2": -84.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -73.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.4454365968704224, "rewards_train/1-l": -1.328521966934204, "rewards_train/1-w": 2.237203598022461, "rewards_train/2-2": 1.9226690530776978, "rewards_train/2-w": 1.2678258419036865, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.565725564956665, "rewards_train/margins_1": 0.7917670011520386, "rewards_train/margins_2": 0.6548432111740112, "step": 337 }, { "epoch": 1.01, "logps_train/policy_1_2": -108.07490539550781, "logps_train/policy_1_l": -127.76934814453125, "logps_train/policy_1_w": -88.30413818359375, "logps_train/policy_2_2": -79.38914489746094, "logps_train/policy_2_w": -117.24612426757812, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 0.7581340074539185, "rewards_train/1-l": -1.4542784690856934, "rewards_train/1-w": 1.9471253156661987, "rewards_train/2-2": 1.6716320514678955, "rewards_train/2-w": 1.2363247871398926, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.401403784751892, "rewards_train/margins_1": 1.1889913082122803, "rewards_train/margins_2": 0.43530726432800293, "step": 337 }, { "epoch": 1.01, "logps_train/policy_1_2": -199.39791870117188, "logps_train/policy_1_l": -214.36868286132812, "logps_train/policy_1_w": -153.60057067871094, "logps_train/policy_2_2": -145.3643341064453, "logps_train/policy_2_w": -210.28240966796875, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.4500510692596436, "rewards_train/1-l": -2.5280797481536865, "rewards_train/1-w": 3.177833080291748, "rewards_train/2-2": 3.4143476486206055, "rewards_train/2-w": 1.227423906326294, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.705912828445435, "rewards_train/margins_1": 1.7277820110321045, "rewards_train/margins_2": 2.1869237422943115, "step": 337 }, { "epoch": 1.01, "logps_train/policy_1_2": -158.70565795898438, "logps_train/policy_1_l": -129.86892700195312, "logps_train/policy_1_w": -114.26197052001953, "logps_train/policy_2_2": -119.9407958984375, "logps_train/policy_2_w": -162.91128540039062, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.5778710842132568, "rewards_train/1-l": -1.2657983303070068, "rewards_train/1-w": 2.8769285678863525, "rewards_train/2-2": 2.587952136993408, "rewards_train/2-w": 1.2776215076446533, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.142726898193359, "rewards_train/margins_1": 1.2990574836730957, "rewards_train/margins_2": 1.3103306293487549, "step": 337 }, { "epoch": 1.01, "learning_rate": 2.672748620507195e-06, "loss": 0.5627, "step": 338 }, { "epoch": 1.01, "logps_train/policy_1_2": -235.19566345214844, "logps_train/policy_1_l": -301.3889465332031, "logps_train/policy_1_w": -163.8587646484375, "logps_train/policy_2_2": -186.49920654296875, "logps_train/policy_2_w": -229.32858276367188, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -272.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -226.0, "logps_train/ref_2_w": -250.0, "rewards_train/1-2": 2.3525538444519043, "rewards_train/1-l": -2.976003885269165, "rewards_train/1-w": 4.332091331481934, "rewards_train/2-2": 3.9688305854797363, "rewards_train/2-w": 2.072611093521118, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.308095216751099, "rewards_train/margins_1": 1.9795374870300293, "rewards_train/margins_2": 1.8962194919586182, "step": 338 }, { "epoch": 1.01, "logps_train/policy_1_2": -161.9898223876953, "logps_train/policy_1_l": -114.66281127929688, "logps_train/policy_1_w": -130.25558471679688, "logps_train/policy_2_2": -126.03730773925781, "logps_train/policy_2_w": -178.5918426513672, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.1410572528839111, "rewards_train/1-l": -1.1326875686645508, "rewards_train/1-w": 3.01194167137146, "rewards_train/2-2": 2.2400193214416504, "rewards_train/2-w": 1.9814410209655762, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.144629240036011, "rewards_train/margins_1": 1.8708844184875488, "rewards_train/margins_2": 0.2585783004760742, "step": 338 }, { "epoch": 1.01, "logps_train/policy_1_2": -133.33218383789062, "logps_train/policy_1_l": -120.2933578491211, "logps_train/policy_1_w": -92.54931640625, "logps_train/policy_2_2": -104.04183197021484, "logps_train/policy_2_w": -122.93818664550781, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -113.5, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.633969783782959, "rewards_train/1-l": -1.1609764099121094, "rewards_train/1-w": 2.06772518157959, "rewards_train/2-2": 2.542301654815674, "rewards_train/2-w": 1.2968065738677979, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.228701591491699, "rewards_train/margins_1": 0.43375539779663086, "rewards_train/margins_2": 1.245495080947876, "step": 338 }, { "epoch": 1.01, "logps_train/policy_1_2": -147.3921661376953, "logps_train/policy_1_l": -212.53524780273438, "logps_train/policy_1_w": -141.6759033203125, "logps_train/policy_2_2": -116.1829833984375, "logps_train/policy_2_w": -171.89959716796875, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 2.1764087677001953, "rewards_train/1-l": -2.963679790496826, "rewards_train/1-w": 2.951158046722412, "rewards_train/2-2": 2.96881103515625, "rewards_train/2-w": 1.9194152355194092, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.914837837219238, "rewards_train/margins_1": 0.7747492790222168, "rewards_train/margins_2": 1.0493957996368408, "step": 338 }, { "epoch": 1.01, "logps_train/policy_1_2": -130.85848999023438, "logps_train/policy_1_l": -141.14572143554688, "logps_train/policy_1_w": -75.33212280273438, "logps_train/policy_2_2": -93.81278228759766, "logps_train/policy_2_w": -115.40704345703125, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 0.9149324297904968, "rewards_train/1-l": -1.6833223104476929, "rewards_train/1-w": 1.6386630535125732, "rewards_train/2-2": 2.090987205505371, "rewards_train/2-w": 0.7358576655387878, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.321985363960266, "rewards_train/margins_1": 0.7237306237220764, "rewards_train/margins_2": 1.3551295399665833, "step": 338 }, { "epoch": 1.01, "logps_train/policy_1_2": -117.19239807128906, "logps_train/policy_1_l": -127.24604797363281, "logps_train/policy_1_w": -139.12652587890625, "logps_train/policy_2_2": -89.48908233642578, "logps_train/policy_2_w": -174.5882568359375, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.0985825061798096, "rewards_train/1-l": -1.118159294128418, "rewards_train/1-w": 3.00297212600708, "rewards_train/2-2": 2.0142760276794434, "rewards_train/2-w": 1.752112865447998, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.121131420135498, "rewards_train/margins_1": 1.9043896198272705, "rewards_train/margins_2": 0.2621631622314453, "step": 338 }, { "epoch": 1.01, "logps_train/policy_1_2": -142.283447265625, "logps_train/policy_1_l": -141.6510772705078, "logps_train/policy_1_w": -80.46839904785156, "logps_train/policy_2_2": -118.90293884277344, "logps_train/policy_2_w": -105.47419738769531, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.8247798681259155, "rewards_train/1-l": -0.9391797780990601, "rewards_train/1-w": 2.727379322052002, "rewards_train/2-2": 2.8815817832946777, "rewards_train/2-w": 2.0213303565979004, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.666559100151062, "rewards_train/margins_1": 0.9025994539260864, "rewards_train/margins_2": 0.8602514266967773, "step": 338 }, { "epoch": 1.01, "logps_train/policy_1_2": -155.75094604492188, "logps_train/policy_1_l": -87.96354675292969, "logps_train/policy_1_w": -91.39425659179688, "logps_train/policy_2_2": -114.13562774658203, "logps_train/policy_2_w": -135.26351928710938, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -73.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.8897489309310913, "rewards_train/1-l": -1.5287764072418213, "rewards_train/1-w": 2.878152370452881, "rewards_train/2-2": 2.89971923828125, "rewards_train/2-w": 1.13614821434021, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.406928777694702, "rewards_train/margins_1": 1.9884034395217896, "rewards_train/margins_2": 1.76357102394104, "step": 338 }, { "epoch": 1.01, "logps_train/policy_1_2": -89.01840209960938, "logps_train/policy_1_l": -80.88128662109375, "logps_train/policy_1_w": -84.53160858154297, "logps_train/policy_2_2": -72.98300170898438, "logps_train/policy_2_w": -105.09053039550781, "logps_train/ref_1_2": -101.5, "logps_train/ref_1_l": -69.5, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -90.5, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 1.2173006534576416, "rewards_train/1-l": -1.142717719078064, "rewards_train/1-w": 2.7448854446411133, "rewards_train/2-2": 1.743496298789978, "rewards_train/2-w": 1.9573523998260498, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8876031637191772, "rewards_train/margins_1": 1.5275847911834717, "rewards_train/margins_2": -0.21385610103607178, "step": 339 }, { "epoch": 1.01, "logps_train/policy_1_2": -119.37138366699219, "logps_train/policy_1_l": -161.08811950683594, "logps_train/policy_1_w": -170.43650817871094, "logps_train/policy_2_2": -100.32572174072266, "logps_train/policy_2_w": -206.59788513183594, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.22067391872406, "rewards_train/1-l": -1.7674057483673096, "rewards_train/1-w": 2.175684928894043, "rewards_train/2-2": 1.7588340044021606, "rewards_train/2-w": 0.8550557494163513, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.9430906772613525, "rewards_train/margins_1": 0.9550110101699829, "rewards_train/margins_2": 0.9037782549858093, "step": 339 }, { "epoch": 1.01, "logps_train/policy_1_2": -137.50799560546875, "logps_train/policy_1_l": -96.62256622314453, "logps_train/policy_1_w": -80.87228393554688, "logps_train/policy_2_2": -96.68247985839844, "logps_train/policy_2_w": -121.50189208984375, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -85.5, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": 1.0601377487182617, "rewards_train/1-l": -1.1134283542633057, "rewards_train/1-w": 2.3846466541290283, "rewards_train/2-2": 2.5731582641601562, "rewards_train/2-w": 1.5123109817504883, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.498075008392334, "rewards_train/margins_1": 1.3245089054107666, "rewards_train/margins_2": 1.060847282409668, "step": 339 }, { "epoch": 1.01, "logps_train/policy_1_2": -278.56512451171875, "logps_train/policy_1_l": -310.05999755859375, "logps_train/policy_1_w": -151.7566680908203, "logps_train/policy_2_2": -230.85507202148438, "logps_train/policy_2_w": -211.21539306640625, "logps_train/ref_1_2": -304.0, "logps_train/ref_1_l": -268.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -272.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 2.530989170074463, "rewards_train/1-l": -4.287250518798828, "rewards_train/1-w": 4.405583381652832, "rewards_train/2-2": 4.2301177978515625, "rewards_train/2-w": 2.865959644317627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 8.69283390045166, "rewards_train/margins_1": 1.8745942115783691, "rewards_train/margins_2": 1.3641581535339355, "step": 339 }, { "epoch": 1.01, "logps_train/policy_1_2": -195.65020751953125, "logps_train/policy_1_l": -179.95050048828125, "logps_train/policy_1_w": -160.48574829101562, "logps_train/policy_2_2": -151.59075927734375, "logps_train/policy_2_w": -204.609375, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 2.2459161281585693, "rewards_train/1-l": -1.8841124773025513, "rewards_train/1-w": 3.9889259338378906, "rewards_train/2-2": 3.6252999305725098, "rewards_train/2-w": 2.3500003814697266, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.873038411140442, "rewards_train/margins_1": 1.7430098056793213, "rewards_train/margins_2": 1.2752995491027832, "step": 339 }, { "epoch": 1.01, "logps_train/policy_1_2": -189.77322387695312, "logps_train/policy_1_l": -231.34268188476562, "logps_train/policy_1_w": -176.2144775390625, "logps_train/policy_2_2": -161.89674377441406, "logps_train/policy_2_w": -204.12344360351562, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -201.0, "logps_train/ref_1_w": -207.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 1.8195527791976929, "rewards_train/1-l": -3.0233311653137207, "rewards_train/1-w": 3.066051959991455, "rewards_train/2-2": 2.9478254318237305, "rewards_train/2-w": 2.025155544281006, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.089383125305176, "rewards_train/margins_1": 1.2464991807937622, "rewards_train/margins_2": 0.9226698875427246, "step": 339 }, { "epoch": 1.01, "logps_train/policy_1_2": -70.15221405029297, "logps_train/policy_1_l": -103.17218017578125, "logps_train/policy_1_w": -72.39338684082031, "logps_train/policy_2_2": -62.5826301574707, "logps_train/policy_2_w": -80.85702514648438, "logps_train/ref_1_2": -86.0, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -81.0, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": 1.5871226787567139, "rewards_train/1-l": -1.42903470993042, "rewards_train/1-w": 1.7255057096481323, "rewards_train/2-2": 1.813025712966919, "rewards_train/2-w": 1.409609317779541, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.1545404195785522, "rewards_train/margins_1": 0.13838303089141846, "rewards_train/margins_2": 0.40341639518737793, "step": 339 }, { "epoch": 1.01, "logps_train/policy_1_2": -126.86690521240234, "logps_train/policy_1_l": -151.1028594970703, "logps_train/policy_1_w": -129.20083618164062, "logps_train/policy_2_2": -105.58145141601562, "logps_train/policy_2_w": -164.64126586914062, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.06096613407135, "rewards_train/1-l": -1.6719071865081787, "rewards_train/1-w": 2.662729501724243, "rewards_train/2-2": 1.7668547630310059, "rewards_train/2-w": 1.8061859607696533, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.334636688232422, "rewards_train/margins_1": 1.601763367652893, "rewards_train/margins_2": -0.03933119773864746, "step": 339 }, { "epoch": 1.02, "learning_rate": 2.648101569734286e-06, "loss": 0.5498, "step": 340 }, { "epoch": 1.02, "logps_train/policy_1_2": -136.3957977294922, "logps_train/policy_1_l": -103.37403869628906, "logps_train/policy_1_w": -99.12721252441406, "logps_train/policy_2_2": -111.65910339355469, "logps_train/policy_2_w": -125.85737609863281, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.86432683467865, "rewards_train/1-l": -0.7651379704475403, "rewards_train/1-w": 2.3279035091400146, "rewards_train/2-2": 2.652839183807373, "rewards_train/2-w": 1.4666059017181396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.093041479587555, "rewards_train/margins_1": 0.46357667446136475, "rewards_train/margins_2": 1.1862332820892334, "step": 340 }, { "epoch": 1.02, "logps_train/policy_1_2": -109.31352233886719, "logps_train/policy_1_l": -109.80610656738281, "logps_train/policy_1_w": -81.25991821289062, "logps_train/policy_2_2": -83.43048858642578, "logps_train/policy_2_w": -100.21133422851562, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -104.5, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 1.1327104568481445, "rewards_train/1-l": -1.81977117061615, "rewards_train/1-w": 2.3333840370178223, "rewards_train/2-2": 2.065544843673706, "rewards_train/2-w": 1.7874599695205688, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.153155207633972, "rewards_train/margins_1": 1.2006735801696777, "rewards_train/margins_2": 0.2780848741531372, "step": 340 }, { "epoch": 1.02, "logps_train/policy_1_2": -177.10980224609375, "logps_train/policy_1_l": -112.89785766601562, "logps_train/policy_1_w": -82.76007080078125, "logps_train/policy_2_2": -132.0108184814453, "logps_train/policy_2_w": -117.61471557617188, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": 0.930816650390625, "rewards_train/1-l": -1.0843174457550049, "rewards_train/1-w": 1.9325871467590332, "rewards_train/2-2": 2.9649338722229004, "rewards_train/2-w": 0.9913601279258728, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.016904592514038, "rewards_train/margins_1": 1.0017704963684082, "rewards_train/margins_2": 1.9735737442970276, "step": 340 }, { "epoch": 1.02, "logps_train/policy_1_2": -135.7181854248047, "logps_train/policy_1_l": -90.44117736816406, "logps_train/policy_1_w": -62.2225227355957, "logps_train/policy_2_2": -103.21770477294922, "logps_train/policy_2_w": -88.20226287841797, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -80.0, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -105.5, "rewards_train/1-2": 2.089118719100952, "rewards_train/1-l": -1.006812334060669, "rewards_train/1-w": 2.4629039764404297, "rewards_train/2-2": 2.868854522705078, "rewards_train/2-w": 1.7258679866790771, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4697163105010986, "rewards_train/margins_1": 0.37378525733947754, "rewards_train/margins_2": 1.142986536026001, "step": 340 }, { "epoch": 1.02, "logps_train/policy_1_2": -133.712890625, "logps_train/policy_1_l": -102.53575134277344, "logps_train/policy_1_w": -76.47598266601562, "logps_train/policy_2_2": -114.8831787109375, "logps_train/policy_2_w": -94.09544372558594, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -93.5, "logps_train/ref_1_w": -93.5, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -105.5, "rewards_train/1-2": 0.8185549974441528, "rewards_train/1-l": -0.9061145782470703, "rewards_train/1-w": 1.7195895910263062, "rewards_train/2-2": 1.371252179145813, "rewards_train/2-w": 1.1310808658599854, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.6257041692733765, "rewards_train/margins_1": 0.9010345935821533, "rewards_train/margins_2": 0.24017131328582764, "step": 340 }, { "epoch": 1.02, "logps_train/policy_1_2": -130.1616973876953, "logps_train/policy_1_l": -153.39874267578125, "logps_train/policy_1_w": -102.09195709228516, "logps_train/policy_2_2": -101.85297393798828, "logps_train/policy_2_w": -143.00637817382812, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.536173701286316, "rewards_train/1-l": -2.2631173133850098, "rewards_train/1-w": 2.536116361618042, "rewards_train/2-2": 2.151421070098877, "rewards_train/2-w": 1.7860809564590454, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.799233675003052, "rewards_train/margins_1": 0.9999426603317261, "rewards_train/margins_2": 0.36534011363983154, "step": 340 }, { "epoch": 1.02, "logps_train/policy_1_2": -229.561767578125, "logps_train/policy_1_l": -317.54766845703125, "logps_train/policy_1_w": -253.48934936523438, "logps_train/policy_2_2": -178.7730712890625, "logps_train/policy_2_w": -331.23114013671875, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -288.0, "logps_train/ref_1_w": -312.0, "logps_train/ref_2_2": -215.0, "logps_train/ref_2_w": -360.0, "rewards_train/1-2": 1.6750727891921997, "rewards_train/1-l": -2.8297667503356934, "rewards_train/1-w": 5.835439682006836, "rewards_train/2-2": 3.5945677757263184, "rewards_train/2-w": 2.939385175704956, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 8.66520643234253, "rewards_train/margins_1": 4.160366892814636, "rewards_train/margins_2": 0.6551826000213623, "step": 340 }, { "epoch": 1.02, "logps_train/policy_1_2": -136.36715698242188, "logps_train/policy_1_l": -119.01359558105469, "logps_train/policy_1_w": -91.946044921875, "logps_train/policy_2_2": -99.97330474853516, "logps_train/policy_2_w": -118.03199768066406, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -100.5, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 1.113479733467102, "rewards_train/1-l": -1.8429617881774902, "rewards_train/1-w": 3.006958484649658, "rewards_train/2-2": 2.0263023376464844, "rewards_train/2-w": 2.016331195831299, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.849920272827148, "rewards_train/margins_1": 1.8934787511825562, "rewards_train/margins_2": 0.009971141815185547, "step": 340 }, { "epoch": 1.02, "logps_train/policy_1_2": -114.82537841796875, "logps_train/policy_1_l": -136.2711639404297, "logps_train/policy_1_w": -78.46134185791016, "logps_train/policy_2_2": -80.9915771484375, "logps_train/policy_2_w": -103.35179138183594, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -117.0, "rewards_train/1-2": 1.5698058605194092, "rewards_train/1-l": -2.108414649963379, "rewards_train/1-w": 2.2064049243927, "rewards_train/2-2": 2.4914674758911133, "rewards_train/2-w": 1.397634506225586, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.314819574356079, "rewards_train/margins_1": 0.636599063873291, "rewards_train/margins_2": 1.0938329696655273, "step": 341 }, { "epoch": 1.02, "logps_train/policy_1_2": -162.26104736328125, "logps_train/policy_1_l": -159.76588439941406, "logps_train/policy_1_w": -147.79464721679688, "logps_train/policy_2_2": -127.25566101074219, "logps_train/policy_2_w": -185.80010986328125, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.9363961219787598, "rewards_train/1-l": -2.371119976043701, "rewards_train/1-w": 3.631472110748291, "rewards_train/2-2": 3.163496255874634, "rewards_train/2-w": 2.291863203048706, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.002592086791992, "rewards_train/margins_1": 1.6950759887695312, "rewards_train/margins_2": 0.8716330528259277, "step": 341 }, { "epoch": 1.02, "logps_train/policy_1_2": -191.31707763671875, "logps_train/policy_1_l": -187.1136016845703, "logps_train/policy_1_w": -171.93081665039062, "logps_train/policy_2_2": -145.20541381835938, "logps_train/policy_2_w": -220.82598876953125, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -205.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -236.0, "rewards_train/1-2": 1.8870420455932617, "rewards_train/1-l": -1.6613603830337524, "rewards_train/1-w": 3.313559055328369, "rewards_train/2-2": 3.352895736694336, "rewards_train/2-w": 1.5412296056747437, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.974919438362122, "rewards_train/margins_1": 1.4265170097351074, "rewards_train/margins_2": 1.8116661310195923, "step": 341 }, { "epoch": 1.02, "logps_train/policy_1_2": -135.00938415527344, "logps_train/policy_1_l": -113.324951171875, "logps_train/policy_1_w": -130.2139129638672, "logps_train/policy_2_2": -103.138916015625, "logps_train/policy_2_w": -165.55160522460938, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.5529674291610718, "rewards_train/1-l": -1.6389408111572266, "rewards_train/1-w": 3.642671585083008, "rewards_train/2-2": 2.7630624771118164, "rewards_train/2-w": 2.5292131900787354, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.281612396240234, "rewards_train/margins_1": 2.089704155921936, "rewards_train/margins_2": 0.23384928703308105, "step": 341 }, { "epoch": 1.02, "logps_train/policy_1_2": -215.93133544921875, "logps_train/policy_1_l": -134.54244995117188, "logps_train/policy_1_w": -128.14187622070312, "logps_train/policy_2_2": -171.3308563232422, "logps_train/policy_2_w": -164.62042236328125, "logps_train/ref_1_2": -225.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 0.9205377101898193, "rewards_train/1-l": -1.07025945186615, "rewards_train/1-w": 2.9272189140319824, "rewards_train/2-2": 2.669257879257202, "rewards_train/2-w": 1.7254588603973389, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.9974783658981323, "rewards_train/margins_1": 2.006681203842163, "rewards_train/margins_2": 0.9437990188598633, "step": 341 }, { "epoch": 1.02, "logps_train/policy_1_2": -141.6291046142578, "logps_train/policy_1_l": -118.95161437988281, "logps_train/policy_1_w": -84.83067321777344, "logps_train/policy_2_2": -108.17499542236328, "logps_train/policy_2_w": -116.72474670410156, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 1.6464638710021973, "rewards_train/1-l": -1.2026809453964233, "rewards_train/1-w": 2.1980843544006348, "rewards_train/2-2": 2.616875648498535, "rewards_train/2-w": 1.1642440557479858, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.400765299797058, "rewards_train/margins_1": 0.5516204833984375, "rewards_train/margins_2": 1.4526315927505493, "step": 341 }, { "epoch": 1.02, "logps_train/policy_1_2": -187.8422088623047, "logps_train/policy_1_l": -194.41712951660156, "logps_train/policy_1_w": -148.39163208007812, "logps_train/policy_2_2": -142.72726440429688, "logps_train/policy_2_w": -206.56881713867188, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.4364819526672363, "rewards_train/1-l": -2.031947612762451, "rewards_train/1-w": 3.067087173461914, "rewards_train/2-2": 3.3018836975097656, "rewards_train/2-w": 1.5337432622909546, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.099034786224365, "rewards_train/margins_1": 1.6306052207946777, "rewards_train/margins_2": 1.768140435218811, "step": 341 }, { "epoch": 1.02, "logps_train/policy_1_2": -83.22450256347656, "logps_train/policy_1_l": -124.27800750732422, "logps_train/policy_1_w": -86.88589477539062, "logps_train/policy_2_2": -61.58055877685547, "logps_train/policy_2_w": -109.37324523925781, "logps_train/ref_1_2": -93.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -75.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 0.975206732749939, "rewards_train/1-l": -1.51901113986969, "rewards_train/1-w": 2.8364102840423584, "rewards_train/2-2": 1.3448737859725952, "rewards_train/2-w": 1.7556447982788086, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.355421423912048, "rewards_train/margins_1": 1.8612035512924194, "rewards_train/margins_2": -0.4107710123062134, "step": 341 }, { "epoch": 1.02, "learning_rate": 2.6234400645092576e-06, "loss": 0.6708, "step": 342 }, { "epoch": 1.02, "logps_train/policy_1_2": -109.76422119140625, "logps_train/policy_1_l": -152.1138916015625, "logps_train/policy_1_w": -158.5595703125, "logps_train/policy_2_2": -83.64076232910156, "logps_train/policy_2_w": -204.23397827148438, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.4032652378082275, "rewards_train/1-l": -1.8410762548446655, "rewards_train/1-w": 2.809668779373169, "rewards_train/2-2": 2.1486189365386963, "rewards_train/2-w": 0.8156643509864807, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.6507450342178345, "rewards_train/margins_1": 1.4064035415649414, "rewards_train/margins_2": 1.3329545855522156, "step": 342 }, { "epoch": 1.02, "logps_train/policy_1_2": -177.80076599121094, "logps_train/policy_1_l": -152.42274475097656, "logps_train/policy_1_w": -95.45441436767578, "logps_train/policy_2_2": -152.07603454589844, "logps_train/policy_2_w": -116.42981719970703, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 1.771485447883606, "rewards_train/1-l": -1.9603406190872192, "rewards_train/1-w": 2.4311206340789795, "rewards_train/2-2": 2.8384904861450195, "rewards_train/2-w": 2.142955780029297, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.391461253166199, "rewards_train/margins_1": 0.6596351861953735, "rewards_train/margins_2": 0.6955347061157227, "step": 342 }, { "epoch": 1.02, "logps_train/policy_1_2": -157.20144653320312, "logps_train/policy_1_l": -164.95590209960938, "logps_train/policy_1_w": -140.6087646484375, "logps_train/policy_2_2": -131.9158935546875, "logps_train/policy_2_w": -166.4188690185547, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.9517302513122559, "rewards_train/1-l": -1.6909029483795166, "rewards_train/1-w": 2.5781869888305664, "rewards_train/2-2": 2.819347858428955, "rewards_train/2-w": 1.5924885272979736, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.269089937210083, "rewards_train/margins_1": 0.6264567375183105, "rewards_train/margins_2": 1.2268593311309814, "step": 342 }, { "epoch": 1.02, "logps_train/policy_1_2": -238.0794677734375, "logps_train/policy_1_l": -196.16822814941406, "logps_train/policy_1_w": -142.08815002441406, "logps_train/policy_2_2": -199.14544677734375, "logps_train/policy_2_w": -181.5006103515625, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -238.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.9561169147491455, "rewards_train/1-l": -3.1136980056762695, "rewards_train/1-w": 3.2798564434051514, "rewards_train/2-2": 3.8206112384796143, "rewards_train/2-w": 1.8577510118484497, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.393554449081421, "rewards_train/margins_1": 1.3237395286560059, "rewards_train/margins_2": 1.9628602266311646, "step": 342 }, { "epoch": 1.02, "logps_train/policy_1_2": -138.31375122070312, "logps_train/policy_1_l": -171.48910522460938, "logps_train/policy_1_w": -170.10281372070312, "logps_train/policy_2_2": -112.98382568359375, "logps_train/policy_2_w": -213.0309600830078, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 1.4162802696228027, "rewards_train/1-l": -2.163447856903076, "rewards_train/1-w": 2.3608131408691406, "rewards_train/2-2": 2.203570604324341, "rewards_train/2-w": 1.1437783241271973, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.524260997772217, "rewards_train/margins_1": 0.9445328712463379, "rewards_train/margins_2": 1.0597922801971436, "step": 342 }, { "epoch": 1.02, "logps_train/policy_1_2": -104.84095764160156, "logps_train/policy_1_l": -157.50527954101562, "logps_train/policy_1_w": -70.56716918945312, "logps_train/policy_2_2": -92.94927215576172, "logps_train/policy_2_w": -85.81411743164062, "logps_train/ref_1_2": -121.5, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -89.5, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -99.5, "rewards_train/1-2": 1.675279974937439, "rewards_train/1-l": -2.1587300300598145, "rewards_train/1-w": 1.8846895694732666, "rewards_train/2-2": 2.1011664867401123, "rewards_train/2-w": 1.3744478225708008, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.043419599533081, "rewards_train/margins_1": 0.20940959453582764, "rewards_train/margins_2": 0.7267186641693115, "step": 342 }, { "epoch": 1.02, "logps_train/policy_1_2": -119.75216674804688, "logps_train/policy_1_l": -234.83694458007812, "logps_train/policy_1_w": -146.5823974609375, "logps_train/policy_2_2": -97.81683349609375, "logps_train/policy_2_w": -176.13375854492188, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -201.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.8982203006744385, "rewards_train/1-l": -3.385648727416992, "rewards_train/1-w": 2.6308724880218506, "rewards_train/2-2": 2.4126524925231934, "rewards_train/2-w": 1.432718276977539, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.016521215438843, "rewards_train/margins_1": 0.7326521873474121, "rewards_train/margins_2": 0.9799342155456543, "step": 342 }, { "epoch": 1.02, "logps_train/policy_1_2": -193.3189697265625, "logps_train/policy_1_l": -159.66729736328125, "logps_train/policy_1_w": -183.32601928710938, "logps_train/policy_2_2": -160.18978881835938, "logps_train/policy_2_w": -211.9889678955078, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 2.147986888885498, "rewards_train/1-l": -1.4009099006652832, "rewards_train/1-w": 4.031461715698242, "rewards_train/2-2": 3.4429352283477783, "rewards_train/2-w": 2.9104785919189453, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.432371616363525, "rewards_train/margins_1": 1.8834748268127441, "rewards_train/margins_2": 0.532456636428833, "step": 342 }, { "epoch": 1.03, "logps_train/policy_1_2": -104.12769317626953, "logps_train/policy_1_l": -124.55638122558594, "logps_train/policy_1_w": -108.03529357910156, "logps_train/policy_2_2": -86.916748046875, "logps_train/policy_2_w": -144.01560974121094, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 0.8923084735870361, "rewards_train/1-l": -1.5841538906097412, "rewards_train/1-w": 2.6605334281921387, "rewards_train/2-2": 1.3454340696334839, "rewards_train/2-w": 1.5011732578277588, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.24468731880188, "rewards_train/margins_1": 1.7682249546051025, "rewards_train/margins_2": -0.1557391881942749, "step": 343 }, { "epoch": 1.03, "logps_train/policy_1_2": -138.7784881591797, "logps_train/policy_1_l": -218.15322875976562, "logps_train/policy_1_w": -140.53353881835938, "logps_train/policy_2_2": -103.89109802246094, "logps_train/policy_2_w": -184.25274658203125, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": 1.8940255641937256, "rewards_train/1-l": -3.097353935241699, "rewards_train/1-w": 3.0161783695220947, "rewards_train/2-2": 2.6296403408050537, "rewards_train/2-w": 1.4919137954711914, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.113532304763794, "rewards_train/margins_1": 1.1221528053283691, "rewards_train/margins_2": 1.1377265453338623, "step": 343 }, { "epoch": 1.03, "logps_train/policy_1_2": -114.13086700439453, "logps_train/policy_1_l": -123.35093688964844, "logps_train/policy_1_w": -92.6775894165039, "logps_train/policy_2_2": -84.49856567382812, "logps_train/policy_2_w": -128.19656372070312, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 1.350975751876831, "rewards_train/1-l": -1.5479841232299805, "rewards_train/1-w": 2.690053939819336, "rewards_train/2-2": 2.361081123352051, "rewards_train/2-w": 1.4811251163482666, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.238038063049316, "rewards_train/margins_1": 1.3390781879425049, "rewards_train/margins_2": 0.8799560070037842, "step": 343 }, { "epoch": 1.03, "logps_train/policy_1_2": -144.147216796875, "logps_train/policy_1_l": -147.46376037597656, "logps_train/policy_1_w": -128.21994018554688, "logps_train/policy_2_2": -110.61188507080078, "logps_train/policy_2_w": -152.23716735839844, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 0.5509029626846313, "rewards_train/1-l": -1.7260631322860718, "rewards_train/1-w": 1.6428508758544922, "rewards_train/2-2": 1.9106864929199219, "rewards_train/2-w": 0.5411273241043091, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.368914008140564, "rewards_train/margins_1": 1.0919479131698608, "rewards_train/margins_2": 1.3695591688156128, "step": 343 }, { "epoch": 1.03, "logps_train/policy_1_2": -80.35619354248047, "logps_train/policy_1_l": -94.24510955810547, "logps_train/policy_1_w": -58.13422393798828, "logps_train/policy_2_2": -65.20250701904297, "logps_train/policy_2_w": -69.72027587890625, "logps_train/ref_1_2": -86.0, "logps_train/ref_1_l": -81.5, "logps_train/ref_1_w": -72.0, "logps_train/ref_2_2": -74.5, "logps_train/ref_2_w": -79.0, "rewards_train/1-2": 0.5620366930961609, "rewards_train/1-l": -1.2387688159942627, "rewards_train/1-w": 1.3883352279663086, "rewards_train/2-2": 0.9336560368537903, "rewards_train/2-w": 0.9365662932395935, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.6271040439605713, "rewards_train/margins_1": 0.8262985348701477, "rewards_train/margins_2": -0.0029102563858032227, "step": 343 }, { "epoch": 1.03, "logps_train/policy_1_2": -180.64706420898438, "logps_train/policy_1_l": -228.19432067871094, "logps_train/policy_1_w": -189.9622802734375, "logps_train/policy_2_2": -147.9483642578125, "logps_train/policy_2_w": -232.62652587890625, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -247.0, "rewards_train/1-2": 2.422792434692383, "rewards_train/1-l": -1.7092764377593994, "rewards_train/1-w": 3.0100228786468506, "rewards_train/2-2": 3.43485164642334, "rewards_train/2-w": 1.4185974597930908, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.71929931640625, "rewards_train/margins_1": 0.5872304439544678, "rewards_train/margins_2": 2.016254186630249, "step": 343 }, { "epoch": 1.03, "logps_train/policy_1_2": -99.52104187011719, "logps_train/policy_1_l": -131.32798767089844, "logps_train/policy_1_w": -132.61630249023438, "logps_train/policy_2_2": -73.50714874267578, "logps_train/policy_2_w": -165.80435180664062, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -94.5, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.4428174495697021, "rewards_train/1-l": -2.008579969406128, "rewards_train/1-w": 3.0477454662323, "rewards_train/2-2": 2.088542938232422, "rewards_train/2-w": 2.186751365661621, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.056325435638428, "rewards_train/margins_1": 1.6049280166625977, "rewards_train/margins_2": -0.09820842742919922, "step": 343 }, { "epoch": 1.03, "logps_train/policy_1_2": -178.31301879882812, "logps_train/policy_1_l": -207.21255493164062, "logps_train/policy_1_w": -238.56716918945312, "logps_train/policy_2_2": -144.49209594726562, "logps_train/policy_2_w": -294.9711608886719, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -266.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -300.0, "rewards_train/1-2": 2.3499488830566406, "rewards_train/1-l": -2.5650062561035156, "rewards_train/1-w": 2.702658176422119, "rewards_train/2-2": 3.688291072845459, "rewards_train/2-w": 0.5778847932815552, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.267664432525635, "rewards_train/margins_1": 0.3527092933654785, "rewards_train/margins_2": 3.110406279563904, "step": 343 }, { "epoch": 1.03, "learning_rate": 2.598766511751545e-06, "loss": 0.5968, "step": 344 }, { "epoch": 1.03, "logps_train/policy_1_2": -171.90676879882812, "logps_train/policy_1_l": -201.6748504638672, "logps_train/policy_1_w": -177.7119903564453, "logps_train/policy_2_2": -140.95558166503906, "logps_train/policy_2_w": -215.73025512695312, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -242.0, "rewards_train/1-2": 1.746824026107788, "rewards_train/1-l": -2.4674854278564453, "rewards_train/1-w": 4.028800964355469, "rewards_train/2-2": 2.8044424057006836, "rewards_train/2-w": 2.620723247528076, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.496286392211914, "rewards_train/margins_1": 2.2819769382476807, "rewards_train/margins_2": 0.18371915817260742, "step": 344 }, { "epoch": 1.03, "logps_train/policy_1_2": -171.7288055419922, "logps_train/policy_1_l": -131.07931518554688, "logps_train/policy_1_w": -126.92595672607422, "logps_train/policy_2_2": -141.8015899658203, "logps_train/policy_2_w": -158.98451232910156, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 1.8450887203216553, "rewards_train/1-l": -1.294454574584961, "rewards_train/1-w": 3.4167795181274414, "rewards_train/2-2": 2.815544605255127, "rewards_train/2-w": 2.0218610763549805, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.711234092712402, "rewards_train/margins_1": 1.5716907978057861, "rewards_train/margins_2": 0.7936835289001465, "step": 344 }, { "epoch": 1.03, "logps_train/policy_1_2": -123.26669311523438, "logps_train/policy_1_l": -134.66806030273438, "logps_train/policy_1_w": -94.28353881835938, "logps_train/policy_2_2": -86.22640991210938, "logps_train/policy_2_w": -139.564453125, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 0.985050618648529, "rewards_train/1-l": -1.67989182472229, "rewards_train/1-w": 2.4021639823913574, "rewards_train/2-2": 2.121890068054199, "rewards_train/2-w": 0.9998059272766113, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.0820558071136475, "rewards_train/margins_1": 1.4171133637428284, "rewards_train/margins_2": 1.122084140777588, "step": 344 }, { "epoch": 1.03, "logps_train/policy_1_2": -180.28192138671875, "logps_train/policy_1_l": -205.554931640625, "logps_train/policy_1_w": -104.73539733886719, "logps_train/policy_2_2": -149.96951293945312, "logps_train/policy_2_w": -125.18122863769531, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.478057622909546, "rewards_train/1-l": -2.55197811126709, "rewards_train/1-w": 2.4088821411132812, "rewards_train/2-2": 2.459299087524414, "rewards_train/2-w": 1.9730887413024902, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.960860252380371, "rewards_train/margins_1": 0.9308245182037354, "rewards_train/margins_2": 0.48621034622192383, "step": 344 }, { "epoch": 1.03, "logps_train/policy_1_2": -98.75759887695312, "logps_train/policy_1_l": -75.47373962402344, "logps_train/policy_1_w": -42.21628189086914, "logps_train/policy_2_2": -82.63240051269531, "logps_train/policy_2_w": -50.58003234863281, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -63.0, "logps_train/ref_1_w": -59.5, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -65.0, "rewards_train/1-2": 1.6242399215698242, "rewards_train/1-l": -1.2487409114837646, "rewards_train/1-w": 1.7264187335968018, "rewards_train/2-2": 2.236759901046753, "rewards_train/2-w": 1.4363327026367188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.9751596450805664, "rewards_train/margins_1": 0.10217881202697754, "rewards_train/margins_2": 0.8004271984100342, "step": 344 }, { "epoch": 1.03, "logps_train/policy_1_2": -131.5371551513672, "logps_train/policy_1_l": -221.94595336914062, "logps_train/policy_1_w": -95.0005874633789, "logps_train/policy_2_2": -98.75184631347656, "logps_train/policy_2_w": -128.4005126953125, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 0.9978467226028442, "rewards_train/1-l": -3.181704044342041, "rewards_train/1-w": 2.6147847175598145, "rewards_train/2-2": 2.078721761703491, "rewards_train/2-w": 1.4380738735198975, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.7964887619018555, "rewards_train/margins_1": 1.6169379949569702, "rewards_train/margins_2": 0.6406478881835938, "step": 344 }, { "epoch": 1.03, "logps_train/policy_1_2": -162.73512268066406, "logps_train/policy_1_l": -276.8448486328125, "logps_train/policy_1_w": -116.81206512451172, "logps_train/policy_2_2": -128.0781707763672, "logps_train/policy_2_w": -153.7545928955078, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -236.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 1.6764881610870361, "rewards_train/1-l": -4.0321431159973145, "rewards_train/1-w": 3.3094189167022705, "rewards_train/2-2": 2.7328083515167236, "rewards_train/2-w": 2.135087728500366, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.341562032699585, "rewards_train/margins_1": 1.6329307556152344, "rewards_train/margins_2": 0.5977206230163574, "step": 344 }, { "epoch": 1.03, "logps_train/policy_1_2": -200.9227752685547, "logps_train/policy_1_l": -220.89907836914062, "logps_train/policy_1_w": -123.61885833740234, "logps_train/policy_2_2": -158.61537170410156, "logps_train/policy_2_w": -165.40322875976562, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.50381600856781, "rewards_train/1-l": -3.034830093383789, "rewards_train/1-w": 3.316239356994629, "rewards_train/2-2": 3.0556511878967285, "rewards_train/2-w": 2.362802743911743, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.351069450378418, "rewards_train/margins_1": 1.8124233484268188, "rewards_train/margins_2": 0.6928484439849854, "step": 344 }, { "epoch": 1.03, "logps_train/policy_1_2": -160.36968994140625, "logps_train/policy_1_l": -173.3120880126953, "logps_train/policy_1_w": -109.76387023925781, "logps_train/policy_2_2": -122.86253356933594, "logps_train/policy_2_w": -143.66238403320312, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.4817802906036377, "rewards_train/1-l": -2.0026988983154297, "rewards_train/1-w": 3.151738405227661, "rewards_train/2-2": 2.688746929168701, "rewards_train/2-w": 2.2103233337402344, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.154437303543091, "rewards_train/margins_1": 1.6699581146240234, "rewards_train/margins_2": 0.4784235954284668, "step": 345 }, { "epoch": 1.03, "logps_train/policy_1_2": -151.93869018554688, "logps_train/policy_1_l": -238.63839721679688, "logps_train/policy_1_w": -167.76824951171875, "logps_train/policy_2_2": -124.77193450927734, "logps_train/policy_2_w": -210.40570068359375, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.7280070781707764, "rewards_train/1-l": -2.6782939434051514, "rewards_train/1-w": 2.9606740474700928, "rewards_train/2-2": 2.3618693351745605, "rewards_train/2-w": 1.2055234909057617, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.638967990875244, "rewards_train/margins_1": 1.2326669692993164, "rewards_train/margins_2": 1.1563458442687988, "step": 345 }, { "epoch": 1.03, "logps_train/policy_1_2": -141.32691955566406, "logps_train/policy_1_l": -134.7078094482422, "logps_train/policy_1_w": -117.88018035888672, "logps_train/policy_2_2": -108.52318572998047, "logps_train/policy_2_w": -154.9376220703125, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -116.5, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.3798083066940308, "rewards_train/1-l": -1.8391414880752563, "rewards_train/1-w": 3.3416695594787598, "rewards_train/2-2": 2.572681427001953, "rewards_train/2-w": 1.6781129837036133, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.180811047554016, "rewards_train/margins_1": 1.961861252784729, "rewards_train/margins_2": 0.8945684432983398, "step": 345 }, { "epoch": 1.03, "logps_train/policy_1_2": -104.63023376464844, "logps_train/policy_1_l": -95.95263671875, "logps_train/policy_1_w": -95.23766326904297, "logps_train/policy_2_2": -77.28041076660156, "logps_train/policy_2_w": -125.63191986083984, "logps_train/ref_1_2": -127.5, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": 2.2744760513305664, "rewards_train/1-l": -1.393310546875, "rewards_train/1-w": 2.6918585300445557, "rewards_train/2-2": 2.9938344955444336, "rewards_train/2-w": 1.3430585861206055, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.085169076919556, "rewards_train/margins_1": 0.41738247871398926, "rewards_train/margins_2": 1.6507759094238281, "step": 345 }, { "epoch": 1.03, "logps_train/policy_1_2": -286.18157958984375, "logps_train/policy_1_l": -280.3109436035156, "logps_train/policy_1_w": -186.08627319335938, "logps_train/policy_2_2": -239.287353515625, "logps_train/policy_2_w": -230.43443298339844, "logps_train/ref_1_2": -312.0, "logps_train/ref_1_l": -254.0, "logps_train/ref_1_w": -219.0, "logps_train/ref_2_2": -288.0, "logps_train/ref_2_w": -252.0, "rewards_train/1-2": 2.691215991973877, "rewards_train/1-l": -2.7178127765655518, "rewards_train/1-w": 3.3038740158081055, "rewards_train/2-2": 4.718141078948975, "rewards_train/2-w": 2.2315573692321777, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.021686792373657, "rewards_train/margins_1": 0.6126580238342285, "rewards_train/margins_2": 2.486583709716797, "step": 345 }, { "epoch": 1.03, "logps_train/policy_1_2": -101.29444885253906, "logps_train/policy_1_l": -127.580078125, "logps_train/policy_1_w": -125.03546142578125, "logps_train/policy_2_2": -85.18226623535156, "logps_train/policy_2_w": -153.7109832763672, "logps_train/ref_1_2": -120.5, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.934618353843689, "rewards_train/1-l": -2.443359136581421, "rewards_train/1-w": 2.337859630584717, "rewards_train/2-2": 2.4036478996276855, "rewards_train/2-w": 1.3765573501586914, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.781218767166138, "rewards_train/margins_1": 0.40324127674102783, "rewards_train/margins_2": 1.0270905494689941, "step": 345 }, { "epoch": 1.03, "logps_train/policy_1_2": -138.65621948242188, "logps_train/policy_1_l": -120.28741455078125, "logps_train/policy_1_w": -136.64190673828125, "logps_train/policy_2_2": -107.00025939941406, "logps_train/policy_2_w": -185.81716918945312, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 0.877346396446228, "rewards_train/1-l": -1.3375787734985352, "rewards_train/1-w": 2.0725271701812744, "rewards_train/2-2": 1.605443000793457, "rewards_train/2-w": 0.6354702711105347, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4101059436798096, "rewards_train/margins_1": 1.1951807737350464, "rewards_train/margins_2": 0.9699727296829224, "step": 345 }, { "epoch": 1.03, "logps_train/policy_1_2": -84.21805572509766, "logps_train/policy_1_l": -106.64631652832031, "logps_train/policy_1_w": -54.01852035522461, "logps_train/policy_2_2": -61.11932373046875, "logps_train/policy_2_w": -74.12407684326172, "logps_train/ref_1_2": -96.0, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -74.0, "logps_train/ref_2_2": -79.5, "logps_train/ref_2_w": -87.5, "rewards_train/1-2": 1.1562215089797974, "rewards_train/1-l": -1.7872881889343262, "rewards_train/1-w": 1.9980504512786865, "rewards_train/2-2": 1.853790283203125, "rewards_train/2-w": 1.340033769607544, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.7853386402130127, "rewards_train/margins_1": 0.8418289422988892, "rewards_train/margins_2": 0.513756513595581, "step": 345 }, { "epoch": 1.04, "learning_rate": 2.5740833195563996e-06, "loss": 0.5148, "step": 346 }, { "epoch": 1.04, "logps_train/policy_1_2": -82.96875762939453, "logps_train/policy_1_l": -121.60842895507812, "logps_train/policy_1_w": -94.75578308105469, "logps_train/policy_2_2": -64.32766723632812, "logps_train/policy_2_w": -134.78565979003906, "logps_train/ref_1_2": -92.5, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -78.5, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 0.97665935754776, "rewards_train/1-l": -2.161916971206665, "rewards_train/1-w": 2.3111400604248047, "rewards_train/2-2": 1.4217195510864258, "rewards_train/2-w": 0.8917466402053833, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.47305703163147, "rewards_train/margins_1": 1.3344807028770447, "rewards_train/margins_2": 0.5299729108810425, "step": 346 }, { "epoch": 1.04, "logps_train/policy_1_2": -206.34970092773438, "logps_train/policy_1_l": -167.69700622558594, "logps_train/policy_1_w": -130.05386352539062, "logps_train/policy_2_2": -164.47708129882812, "logps_train/policy_2_w": -162.97633361816406, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.383780598640442, "rewards_train/1-l": -2.1398184299468994, "rewards_train/1-w": 3.5810396671295166, "rewards_train/2-2": 3.296041965484619, "rewards_train/2-w": 2.3679916858673096, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.720858097076416, "rewards_train/margins_1": 2.1972590684890747, "rewards_train/margins_2": 0.9280502796173096, "step": 346 }, { "epoch": 1.04, "logps_train/policy_1_2": -159.60177612304688, "logps_train/policy_1_l": -146.66046142578125, "logps_train/policy_1_w": -108.19475555419922, "logps_train/policy_2_2": -100.27356719970703, "logps_train/policy_2_w": -147.22500610351562, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.0226353406906128, "rewards_train/1-l": -1.7945631742477417, "rewards_train/1-w": 2.7070860862731934, "rewards_train/2-2": 2.5726428031921387, "rewards_train/2-w": 1.4993748664855957, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.501649260520935, "rewards_train/margins_1": 1.6844507455825806, "rewards_train/margins_2": 1.073267936706543, "step": 346 }, { "epoch": 1.04, "logps_train/policy_1_2": -182.12242126464844, "logps_train/policy_1_l": -197.67076110839844, "logps_train/policy_1_w": -142.45306396484375, "logps_train/policy_2_2": -153.01341247558594, "logps_train/policy_2_w": -190.379150390625, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.7565083503723145, "rewards_train/1-l": -3.061997413635254, "rewards_train/1-w": 3.4695374965667725, "rewards_train/2-2": 2.4127209186553955, "rewards_train/2-w": 1.9339592456817627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.531534910202026, "rewards_train/margins_1": 1.713029146194458, "rewards_train/margins_2": 0.4787616729736328, "step": 346 }, { "epoch": 1.04, "logps_train/policy_1_2": -127.5396728515625, "logps_train/policy_1_l": -107.47276306152344, "logps_train/policy_1_w": -116.31969451904297, "logps_train/policy_2_2": -94.74436950683594, "logps_train/policy_2_w": -148.54177856445312, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.371032953262329, "rewards_train/1-l": -1.3427839279174805, "rewards_train/1-w": 2.5039687156677246, "rewards_train/2-2": 2.5896260738372803, "rewards_train/2-w": 0.9520719051361084, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.846752643585205, "rewards_train/margins_1": 1.1329357624053955, "rewards_train/margins_2": 1.6375541687011719, "step": 346 }, { "epoch": 1.04, "logps_train/policy_1_2": -126.86590576171875, "logps_train/policy_1_l": -113.01933288574219, "logps_train/policy_1_w": -74.3282241821289, "logps_train/policy_2_2": -98.40859985351562, "logps_train/policy_2_w": -102.94266510009766, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -98.5, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -117.5, "rewards_train/1-2": 0.427080899477005, "rewards_train/1-l": -2.5999794006347656, "rewards_train/1-w": 2.405458927154541, "rewards_train/2-2": 1.7944921255111694, "rewards_train/2-w": 1.4815149307250977, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.005438327789307, "rewards_train/margins_1": 1.978378027677536, "rewards_train/margins_2": 0.3129771947860718, "step": 346 }, { "epoch": 1.04, "logps_train/policy_1_2": -130.02243041992188, "logps_train/policy_1_l": -95.31983947753906, "logps_train/policy_1_w": -92.4397964477539, "logps_train/policy_2_2": -106.86358642578125, "logps_train/policy_2_w": -116.55671691894531, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.471585988998413, "rewards_train/1-l": -1.1655774116516113, "rewards_train/1-w": 2.463637590408325, "rewards_train/2-2": 2.312371253967285, "rewards_train/2-w": 1.4972578287124634, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6292150020599365, "rewards_train/margins_1": 0.9920516014099121, "rewards_train/margins_2": 0.8151134252548218, "step": 346 }, { "epoch": 1.04, "logps_train/policy_1_2": -76.28856658935547, "logps_train/policy_1_l": -51.33926773071289, "logps_train/policy_1_w": -60.97895812988281, "logps_train/policy_2_2": -59.20170593261719, "logps_train/policy_2_w": -90.749755859375, "logps_train/ref_1_2": -89.0, "logps_train/ref_1_l": -42.0, "logps_train/ref_1_w": -79.0, "logps_train/ref_2_2": -76.0, "logps_train/ref_2_w": -100.5, "rewards_train/1-2": 1.2711430788040161, "rewards_train/1-l": -0.9423253536224365, "rewards_train/1-w": 1.800541877746582, "rewards_train/2-2": 1.6868607997894287, "rewards_train/2-w": 1.008227825164795, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.7428672313690186, "rewards_train/margins_1": 0.5293987989425659, "rewards_train/margins_2": 0.6786329746246338, "step": 346 }, { "epoch": 1.04, "logps_train/policy_1_2": -142.65798950195312, "logps_train/policy_1_l": -117.11750793457031, "logps_train/policy_1_w": -85.7017822265625, "logps_train/policy_2_2": -100.54438781738281, "logps_train/policy_2_w": -119.14396667480469, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -95.5, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.3248260021209717, "rewards_train/1-l": -2.1769847869873047, "rewards_train/1-w": 2.6212286949157715, "rewards_train/2-2": 3.006498336791992, "rewards_train/2-w": 1.550837755203247, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.798213481903076, "rewards_train/margins_1": 1.2964026927947998, "rewards_train/margins_2": 1.4556605815887451, "step": 347 }, { "epoch": 1.04, "logps_train/policy_1_2": -131.16262817382812, "logps_train/policy_1_l": -240.2507781982422, "logps_train/policy_1_w": -119.9426040649414, "logps_train/policy_2_2": -104.28689575195312, "logps_train/policy_2_w": -168.82333374023438, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.9032678604125977, "rewards_train/1-l": -3.043045997619629, "rewards_train/1-w": 2.8104264736175537, "rewards_train/2-2": 2.542795181274414, "rewards_train/2-w": 1.250870943069458, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.853472471237183, "rewards_train/margins_1": 0.907158613204956, "rewards_train/margins_2": 1.291924238204956, "step": 347 }, { "epoch": 1.04, "logps_train/policy_1_2": -149.60433959960938, "logps_train/policy_1_l": -196.30093383789062, "logps_train/policy_1_w": -98.152587890625, "logps_train/policy_2_2": -115.03672790527344, "logps_train/policy_2_w": -129.98736572265625, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 1.0426914691925049, "rewards_train/1-l": -3.037808895111084, "rewards_train/1-w": 2.4230217933654785, "rewards_train/2-2": 2.219764232635498, "rewards_train/2-w": 1.5348560810089111, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.4608306884765625, "rewards_train/margins_1": 1.3803303241729736, "rewards_train/margins_2": 0.6849081516265869, "step": 347 }, { "epoch": 1.04, "logps_train/policy_1_2": -47.6738166809082, "logps_train/policy_1_l": -55.7819709777832, "logps_train/policy_1_w": -53.47454833984375, "logps_train/policy_2_2": -35.56967544555664, "logps_train/policy_2_w": -67.24662780761719, "logps_train/ref_1_2": -59.0, "logps_train/ref_1_l": -44.25, "logps_train/ref_1_w": -71.5, "logps_train/ref_2_2": -49.0, "logps_train/ref_2_w": -81.0, "rewards_train/1-2": 1.141211986541748, "rewards_train/1-l": -1.1387439966201782, "rewards_train/1-w": 1.8025453090667725, "rewards_train/2-2": 1.3453763723373413, "rewards_train/2-w": 1.3640086650848389, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 2.9412893056869507, "rewards_train/margins_1": 0.6613333225250244, "rewards_train/margins_2": -0.01863229274749756, "step": 347 }, { "epoch": 1.04, "logps_train/policy_1_2": -122.47975158691406, "logps_train/policy_1_l": -184.49197387695312, "logps_train/policy_1_w": -93.40658569335938, "logps_train/policy_2_2": -96.64117431640625, "logps_train/policy_2_w": -115.53036499023438, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.6504629850387573, "rewards_train/1-l": -2.344815969467163, "rewards_train/1-w": 2.5394186973571777, "rewards_train/2-2": 2.5202574729919434, "rewards_train/2-w": 1.81766676902771, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.884234666824341, "rewards_train/margins_1": 0.8889557123184204, "rewards_train/margins_2": 0.7025907039642334, "step": 347 }, { "epoch": 1.04, "logps_train/policy_1_2": -128.01336669921875, "logps_train/policy_1_l": -153.03494262695312, "logps_train/policy_1_w": -88.1412353515625, "logps_train/policy_2_2": -97.09810638427734, "logps_train/policy_2_w": -118.150146484375, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": 0.9892891645431519, "rewards_train/1-l": -2.2054474353790283, "rewards_train/1-w": 3.042126178741455, "rewards_train/2-2": 2.2339394092559814, "rewards_train/2-w": 2.1068596839904785, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.247573614120483, "rewards_train/margins_1": 2.0528370141983032, "rewards_train/margins_2": 0.12707972526550293, "step": 347 }, { "epoch": 1.04, "logps_train/policy_1_2": -180.66049194335938, "logps_train/policy_1_l": -244.84677124023438, "logps_train/policy_1_w": -131.37657165527344, "logps_train/policy_2_2": -133.43621826171875, "logps_train/policy_2_w": -175.21511840820312, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.7120754718780518, "rewards_train/1-l": -3.3432717323303223, "rewards_train/1-w": 3.0967180728912354, "rewards_train/2-2": 3.284503936767578, "rewards_train/2-w": 1.687863826751709, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.439989805221558, "rewards_train/margins_1": 1.3846426010131836, "rewards_train/margins_2": 1.5966401100158691, "step": 347 }, { "epoch": 1.04, "logps_train/policy_1_2": -137.44113159179688, "logps_train/policy_1_l": -104.08731079101562, "logps_train/policy_1_w": -58.84273910522461, "logps_train/policy_2_2": -101.05341339111328, "logps_train/policy_2_w": -82.2509765625, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -93.5, "logps_train/ref_1_w": -73.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -89.0, "rewards_train/1-2": 0.9965128302574158, "rewards_train/1-l": -1.0687897205352783, "rewards_train/1-w": 1.4235384464263916, "rewards_train/2-2": 2.2970025539398193, "rewards_train/2-w": 0.7018551826477051, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.49232816696167, "rewards_train/margins_1": 0.42702561616897583, "rewards_train/margins_2": 1.5951473712921143, "step": 347 }, { "epoch": 1.04, "learning_rate": 2.5493928969598664e-06, "loss": 0.5812, "step": 348 }, { "epoch": 1.04, "logps_train/policy_1_2": -157.67520141601562, "logps_train/policy_1_l": -133.49769592285156, "logps_train/policy_1_w": -106.84881591796875, "logps_train/policy_2_2": -129.84133911132812, "logps_train/policy_2_w": -126.9480972290039, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 1.3344330787658691, "rewards_train/1-l": -1.4566056728363037, "rewards_train/1-w": 2.5509581565856934, "rewards_train/2-2": 2.537350654602051, "rewards_train/2-w": 1.7735497951507568, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.007563829421997, "rewards_train/margins_1": 1.2165250778198242, "rewards_train/margins_2": 0.763800859451294, "step": 348 }, { "epoch": 1.04, "logps_train/policy_1_2": -159.59963989257812, "logps_train/policy_1_l": -204.9727783203125, "logps_train/policy_1_w": -110.68555450439453, "logps_train/policy_2_2": -130.8875274658203, "logps_train/policy_2_w": -146.66827392578125, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 1.691598653793335, "rewards_train/1-l": -3.3699347972869873, "rewards_train/1-w": 2.4705073833465576, "rewards_train/2-2": 2.8846852779388428, "rewards_train/2-w": 1.234734296798706, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.840442180633545, "rewards_train/margins_1": 0.7789087295532227, "rewards_train/margins_2": 1.6499509811401367, "step": 348 }, { "epoch": 1.04, "logps_train/policy_1_2": -162.5284423828125, "logps_train/policy_1_l": -225.58489990234375, "logps_train/policy_1_w": -109.25589752197266, "logps_train/policy_2_2": -129.716064453125, "logps_train/policy_2_w": -138.01895141601562, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.3616089820861816, "rewards_train/1-l": -3.172943353652954, "rewards_train/1-w": 2.6994102001190186, "rewards_train/2-2": 2.5705809593200684, "rewards_train/2-w": 2.003965377807617, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.872353553771973, "rewards_train/margins_1": 1.337801218032837, "rewards_train/margins_2": 0.5666155815124512, "step": 348 }, { "epoch": 1.04, "logps_train/policy_1_2": -122.36858367919922, "logps_train/policy_1_l": -159.4119873046875, "logps_train/policy_1_w": -98.85884094238281, "logps_train/policy_2_2": -98.99237060546875, "logps_train/policy_2_w": -129.6021728515625, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.6725165843963623, "rewards_train/1-l": -2.714634895324707, "rewards_train/1-w": 2.635209560394287, "rewards_train/2-2": 2.40388822555542, "rewards_train/2-w": 1.2522825002670288, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.349844455718994, "rewards_train/margins_1": 0.9626929759979248, "rewards_train/margins_2": 1.1516057252883911, "step": 348 }, { "epoch": 1.04, "logps_train/policy_1_2": -161.59640502929688, "logps_train/policy_1_l": -214.1259765625, "logps_train/policy_1_w": -125.71656799316406, "logps_train/policy_2_2": -126.06419372558594, "logps_train/policy_2_w": -170.033203125, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.8966097831726074, "rewards_train/1-l": -3.30322265625, "rewards_train/1-w": 3.167405128479004, "rewards_train/2-2": 2.874830722808838, "rewards_train/2-w": 1.856834888458252, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.470627784729004, "rewards_train/margins_1": 1.2707953453063965, "rewards_train/margins_2": 1.017995834350586, "step": 348 }, { "epoch": 1.04, "logps_train/policy_1_2": -156.62109375, "logps_train/policy_1_l": -180.74468994140625, "logps_train/policy_1_w": -148.0130615234375, "logps_train/policy_2_2": -125.70121765136719, "logps_train/policy_2_w": -198.63502502441406, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": 2.4003899097442627, "rewards_train/1-l": -2.468219518661499, "rewards_train/1-w": 3.2330679893493652, "rewards_train/2-2": 3.2580037117004395, "rewards_train/2-w": 1.2505607604980469, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.701287508010864, "rewards_train/margins_1": 0.8326780796051025, "rewards_train/margins_2": 2.0074429512023926, "step": 348 }, { "epoch": 1.04, "logps_train/policy_1_2": -120.78927612304688, "logps_train/policy_1_l": -134.61276245117188, "logps_train/policy_1_w": -128.38955688476562, "logps_train/policy_2_2": -93.43505859375, "logps_train/policy_2_w": -167.56204223632812, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.0460717678070068, "rewards_train/1-l": -2.672605037689209, "rewards_train/1-w": 2.878232002258301, "rewards_train/2-2": 1.9533684253692627, "rewards_train/2-w": 1.4703571796417236, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.55083703994751, "rewards_train/margins_1": 1.832160234451294, "rewards_train/margins_2": 0.48301124572753906, "step": 348 }, { "epoch": 1.04, "logps_train/policy_1_2": -164.1857147216797, "logps_train/policy_1_l": -259.76495361328125, "logps_train/policy_1_w": -118.77875518798828, "logps_train/policy_2_2": -124.32791137695312, "logps_train/policy_2_w": -177.93768310546875, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.2126784324645996, "rewards_train/1-l": -4.839093208312988, "rewards_train/1-w": 3.6283745765686035, "rewards_train/2-2": 2.79533314704895, "rewards_train/2-w": 1.618732213973999, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 8.467467784881592, "rewards_train/margins_1": 2.415696144104004, "rewards_train/margins_2": 1.1766009330749512, "step": 348 }, { "epoch": 1.04, "logps_train/policy_1_2": -159.9591827392578, "logps_train/policy_1_l": -194.46041870117188, "logps_train/policy_1_w": -107.13645935058594, "logps_train/policy_2_2": -127.47325134277344, "logps_train/policy_2_w": -144.863525390625, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 2.466580867767334, "rewards_train/1-l": -2.6481897830963135, "rewards_train/1-w": 3.0894784927368164, "rewards_train/2-2": 3.4995498657226562, "rewards_train/2-w": 1.9917713403701782, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.73766827583313, "rewards_train/margins_1": 0.6228976249694824, "rewards_train/margins_2": 1.507778525352478, "step": 349 }, { "epoch": 1.04, "logps_train/policy_1_2": -80.611328125, "logps_train/policy_1_l": -97.66726684570312, "logps_train/policy_1_w": -77.7833480834961, "logps_train/policy_2_2": -68.6238784790039, "logps_train/policy_2_w": -93.83766174316406, "logps_train/ref_1_2": -91.5, "logps_train/ref_1_l": -79.5, "logps_train/ref_1_w": -92.5, "logps_train/ref_2_2": -84.0, "logps_train/ref_2_w": -100.5, "rewards_train/1-2": 1.0919914245605469, "rewards_train/1-l": -1.7942652702331543, "rewards_train/1-w": 1.4941260814666748, "rewards_train/2-2": 1.5555802583694458, "rewards_train/2-w": 0.6904520392417908, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.288391351699829, "rewards_train/margins_1": 0.40213465690612793, "rewards_train/margins_2": 0.865128219127655, "step": 349 }, { "epoch": 1.04, "logps_train/policy_1_2": -190.774658203125, "logps_train/policy_1_l": -205.66357421875, "logps_train/policy_1_w": -103.9229507446289, "logps_train/policy_2_2": -156.41268920898438, "logps_train/policy_2_w": -136.935791015625, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.7350354194641113, "rewards_train/1-l": -2.938232898712158, "rewards_train/1-w": 2.9014551639556885, "rewards_train/2-2": 2.8962302207946777, "rewards_train/2-w": 1.9251714944839478, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.839688062667847, "rewards_train/margins_1": 1.1664197444915771, "rewards_train/margins_2": 0.97105872631073, "step": 349 }, { "epoch": 1.04, "logps_train/policy_1_2": -49.04728698730469, "logps_train/policy_1_l": -29.465700149536133, "logps_train/policy_1_w": -55.80613327026367, "logps_train/policy_2_2": -38.927162170410156, "logps_train/policy_2_w": -76.25390625, "logps_train/ref_1_2": -58.0, "logps_train/ref_1_l": -25.75, "logps_train/ref_1_w": -79.0, "logps_train/ref_2_2": -52.5, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": 0.8901931047439575, "rewards_train/1-l": -0.3618044853210449, "rewards_train/1-w": 2.346144437789917, "rewards_train/2-2": 1.3699791431427002, "rewards_train/2-w": 1.5816402435302734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.707948923110962, "rewards_train/margins_1": 1.4559513330459595, "rewards_train/margins_2": -0.21166110038757324, "step": 349 }, { "epoch": 1.04, "logps_train/policy_1_2": -183.07745361328125, "logps_train/policy_1_l": -208.05958557128906, "logps_train/policy_1_w": -159.89410400390625, "logps_train/policy_2_2": -145.13592529296875, "logps_train/policy_2_w": -192.55258178710938, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.5750670433044434, "rewards_train/1-l": -3.4099628925323486, "rewards_train/1-w": 2.6973090171813965, "rewards_train/2-2": 2.895782947540283, "rewards_train/2-w": 1.158022403717041, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.107271909713745, "rewards_train/margins_1": 1.1222419738769531, "rewards_train/margins_2": 1.7377605438232422, "step": 349 }, { "epoch": 1.04, "logps_train/policy_1_2": -208.1865234375, "logps_train/policy_1_l": -159.87628173828125, "logps_train/policy_1_w": -150.93177795410156, "logps_train/policy_2_2": -168.31996154785156, "logps_train/policy_2_w": -201.2233428955078, "logps_train/ref_1_2": -227.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.905566930770874, "rewards_train/1-l": -1.927472472190857, "rewards_train/1-w": 2.8497915267944336, "rewards_train/2-2": 3.0922226905822754, "rewards_train/2-w": 0.41204020380973816, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.7772639989852905, "rewards_train/margins_1": 0.9442245960235596, "rewards_train/margins_2": 2.6801824867725372, "step": 349 }, { "epoch": 1.04, "logps_train/policy_1_2": -111.40498352050781, "logps_train/policy_1_l": -190.29107666015625, "logps_train/policy_1_w": -143.34349060058594, "logps_train/policy_2_2": -88.58334350585938, "logps_train/policy_2_w": -170.6149139404297, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.9348926544189453, "rewards_train/1-l": -2.668168783187866, "rewards_train/1-w": 2.590651750564575, "rewards_train/2-2": 2.526041030883789, "rewards_train/2-w": 1.8197591304779053, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.258820533752441, "rewards_train/margins_1": 0.6557590961456299, "rewards_train/margins_2": 0.7062819004058838, "step": 349 }, { "epoch": 1.04, "logps_train/policy_1_2": -101.7403564453125, "logps_train/policy_1_l": -157.30459594726562, "logps_train/policy_1_w": -81.20574951171875, "logps_train/policy_2_2": -76.85957336425781, "logps_train/policy_2_w": -116.093505859375, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -108.5, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 1.1881722211837769, "rewards_train/1-l": -2.7116122245788574, "rewards_train/1-w": 2.734893798828125, "rewards_train/2-2": 1.9498587846755981, "rewards_train/2-w": 1.7242428064346313, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.446506023406982, "rewards_train/margins_1": 1.5467215776443481, "rewards_train/margins_2": 0.2256159782409668, "step": 349 }, { "epoch": 1.05, "learning_rate": 2.5246976537036646e-06, "loss": 0.5306, "step": 350 }, { "epoch": 1.05, "logps_train/policy_1_2": -183.65872192382812, "logps_train/policy_1_l": -147.11474609375, "logps_train/policy_1_w": -150.24114990234375, "logps_train/policy_2_2": -153.7886199951172, "logps_train/policy_2_w": -190.29031372070312, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.809127688407898, "rewards_train/1-l": -1.2486076354980469, "rewards_train/1-w": 2.5575003623962402, "rewards_train/2-2": 2.9992640018463135, "rewards_train/2-w": 0.9608126282691956, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.806107997894287, "rewards_train/margins_1": 0.7483726739883423, "rewards_train/margins_2": 2.038451373577118, "step": 350 }, { "epoch": 1.05, "logps_train/policy_1_2": -156.51034545898438, "logps_train/policy_1_l": -183.74485778808594, "logps_train/policy_1_w": -102.06126403808594, "logps_train/policy_2_2": -128.05746459960938, "logps_train/policy_2_w": -124.28897094726562, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 2.12162184715271, "rewards_train/1-l": -1.8416739702224731, "rewards_train/1-w": 2.8505148887634277, "rewards_train/2-2": 2.981752872467041, "rewards_train/2-w": 2.1039156913757324, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.692188858985901, "rewards_train/margins_1": 0.7288930416107178, "rewards_train/margins_2": 0.8778371810913086, "step": 350 }, { "epoch": 1.05, "logps_train/policy_1_2": -171.39239501953125, "logps_train/policy_1_l": -174.34735107421875, "logps_train/policy_1_w": -75.99542236328125, "logps_train/policy_2_2": -141.75413513183594, "logps_train/policy_2_w": -95.42603302001953, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 1.6455259323120117, "rewards_train/1-l": -2.06754732131958, "rewards_train/1-w": 2.655144691467285, "rewards_train/2-2": 2.794508934020996, "rewards_train/2-w": 2.057396411895752, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.722692012786865, "rewards_train/margins_1": 1.0096187591552734, "rewards_train/margins_2": 0.7371125221252441, "step": 350 }, { "epoch": 1.05, "logps_train/policy_1_2": -95.76734924316406, "logps_train/policy_1_l": -137.71795654296875, "logps_train/policy_1_w": -111.8326416015625, "logps_train/policy_2_2": -74.31954956054688, "logps_train/policy_2_w": -156.22885131835938, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -116.5, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -95.5, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.7654528617858887, "rewards_train/1-l": -2.107341766357422, "rewards_train/1-w": 2.9948606491088867, "rewards_train/2-2": 2.127419948577881, "rewards_train/2-w": 1.1521155834197998, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.102202415466309, "rewards_train/margins_1": 1.229407787322998, "rewards_train/margins_2": 0.975304365158081, "step": 350 }, { "epoch": 1.05, "logps_train/policy_1_2": -116.54078674316406, "logps_train/policy_1_l": -137.15835571289062, "logps_train/policy_1_w": -123.7237548828125, "logps_train/policy_2_2": -86.19633483886719, "logps_train/policy_2_w": -152.7112274169922, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.9068098068237305, "rewards_train/1-l": -1.7879557609558105, "rewards_train/1-w": 2.6666879653930664, "rewards_train/2-2": 2.941255807876587, "rewards_train/2-w": 1.4991899728775024, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.454643726348877, "rewards_train/margins_1": 0.7598781585693359, "rewards_train/margins_2": 1.4420658349990845, "step": 350 }, { "epoch": 1.05, "logps_train/policy_1_2": -108.55987548828125, "logps_train/policy_1_l": -74.8465576171875, "logps_train/policy_1_w": -61.59351348876953, "logps_train/policy_2_2": -88.8062744140625, "logps_train/policy_2_w": -75.47384643554688, "logps_train/ref_1_2": -116.5, "logps_train/ref_1_l": -59.25, "logps_train/ref_1_w": -76.0, "logps_train/ref_2_2": -107.5, "logps_train/ref_2_w": -86.0, "rewards_train/1-2": 0.8272163271903992, "rewards_train/1-l": -1.5457885265350342, "rewards_train/1-w": 1.4742426872253418, "rewards_train/2-2": 1.864099383354187, "rewards_train/2-w": 1.0604275465011597, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.020031213760376, "rewards_train/margins_1": 0.6470263600349426, "rewards_train/margins_2": 0.8036718368530273, "step": 350 }, { "epoch": 1.05, "logps_train/policy_1_2": -91.50462341308594, "logps_train/policy_1_l": -232.32339477539062, "logps_train/policy_1_w": -61.953651428222656, "logps_train/policy_2_2": -71.94296264648438, "logps_train/policy_2_w": -88.13835144042969, "logps_train/ref_1_2": -98.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -82.0, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -101.0, "rewards_train/1-2": 0.643677830696106, "rewards_train/1-l": -4.2710113525390625, "rewards_train/1-w": 1.9924278259277344, "rewards_train/2-2": 1.4217197895050049, "rewards_train/2-w": 1.3088206052780151, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.263439178466797, "rewards_train/margins_1": 1.3487499952316284, "rewards_train/margins_2": 0.11289918422698975, "step": 350 }, { "epoch": 1.05, "logps_train/policy_1_2": -226.549560546875, "logps_train/policy_1_l": -239.27859497070312, "logps_train/policy_1_w": -156.39727783203125, "logps_train/policy_2_2": -180.22018432617188, "logps_train/policy_2_w": -208.969482421875, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 1.287231206893921, "rewards_train/1-l": -3.7278594970703125, "rewards_train/1-w": 3.3118348121643066, "rewards_train/2-2": 3.210012197494507, "rewards_train/2-w": 1.4811768531799316, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.039694309234619, "rewards_train/margins_1": 2.0246036052703857, "rewards_train/margins_2": 1.7288353443145752, "step": 350 }, { "epoch": 1.05, "logps_train/policy_1_2": -270.4258117675781, "logps_train/policy_1_l": -309.1871337890625, "logps_train/policy_1_w": -170.97991943359375, "logps_train/policy_2_2": -225.20062255859375, "logps_train/policy_2_w": -211.13572692871094, "logps_train/ref_1_2": -290.0, "logps_train/ref_1_l": -264.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -260.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 1.968454122543335, "rewards_train/1-l": -4.549965858459473, "rewards_train/1-w": 3.3344292640686035, "rewards_train/2-2": 3.5392158031463623, "rewards_train/2-w": 1.6165047883987427, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.884395122528076, "rewards_train/margins_1": 1.3659751415252686, "rewards_train/margins_2": 1.9227110147476196, "step": 351 }, { "epoch": 1.05, "logps_train/policy_1_2": -253.56626892089844, "logps_train/policy_1_l": -303.44964599609375, "logps_train/policy_1_w": -224.40724182128906, "logps_train/policy_2_2": -198.9219207763672, "logps_train/policy_2_w": -284.0198974609375, "logps_train/ref_1_2": -272.0, "logps_train/ref_1_l": -274.0, "logps_train/ref_1_w": -270.0, "logps_train/ref_2_2": -236.0, "logps_train/ref_2_w": -304.0, "rewards_train/1-2": 1.9433722496032715, "rewards_train/1-l": -2.932466983795166, "rewards_train/1-w": 4.6217756271362305, "rewards_train/2-2": 3.745306968688965, "rewards_train/2-w": 2.14800763130188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.5542426109313965, "rewards_train/margins_1": 2.678403377532959, "rewards_train/margins_2": 1.597299337387085, "step": 351 }, { "epoch": 1.05, "logps_train/policy_1_2": -144.717041015625, "logps_train/policy_1_l": -218.38104248046875, "logps_train/policy_1_w": -113.14198303222656, "logps_train/policy_2_2": -109.80402374267578, "logps_train/policy_2_w": -151.70751953125, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.8009531497955322, "rewards_train/1-l": -3.508026123046875, "rewards_train/1-w": 2.7998642921447754, "rewards_train/2-2": 2.907097578048706, "rewards_train/2-w": 1.842529535293579, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.30789041519165, "rewards_train/margins_1": 0.9989111423492432, "rewards_train/margins_2": 1.064568042755127, "step": 351 }, { "epoch": 1.05, "logps_train/policy_1_2": -35.94211959838867, "logps_train/policy_1_l": -24.06940460205078, "logps_train/policy_1_w": -61.70779037475586, "logps_train/policy_2_2": -23.371084213256836, "logps_train/policy_2_w": -93.4848403930664, "logps_train/ref_1_2": -42.25, "logps_train/ref_1_l": -22.875, "logps_train/ref_1_w": -76.0, "logps_train/ref_2_2": -32.5, "logps_train/ref_2_w": -95.5, "rewards_train/1-2": 0.6393818855285645, "rewards_train/1-l": -0.12207723408937454, "rewards_train/1-w": 1.4421603679656982, "rewards_train/2-2": 0.923047661781311, "rewards_train/2-w": 0.2007349580526352, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.5642376020550728, "rewards_train/margins_1": 0.8027784824371338, "rewards_train/margins_2": 0.7223127037286758, "step": 351 }, { "epoch": 1.05, "logps_train/policy_1_2": -208.0163116455078, "logps_train/policy_1_l": -227.62399291992188, "logps_train/policy_1_w": -148.8878631591797, "logps_train/policy_2_2": -177.08953857421875, "logps_train/policy_2_w": -204.91102600097656, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 2.6264936923980713, "rewards_train/1-l": -3.5225555896759033, "rewards_train/1-w": 3.6737139225006104, "rewards_train/2-2": 3.6004226207733154, "rewards_train/2-w": 2.4057722091674805, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.196269512176514, "rewards_train/margins_1": 1.047220230102539, "rewards_train/margins_2": 1.194650411605835, "step": 351 }, { "epoch": 1.05, "logps_train/policy_1_2": -163.0509033203125, "logps_train/policy_1_l": -238.878662109375, "logps_train/policy_1_w": -92.99139404296875, "logps_train/policy_2_2": -134.30526733398438, "logps_train/policy_2_w": -119.33905029296875, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 2.087096691131592, "rewards_train/1-l": -3.587279796600342, "rewards_train/1-w": 2.9340639114379883, "rewards_train/2-2": 3.1479897499084473, "rewards_train/2-w": 2.0145323276519775, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.52134370803833, "rewards_train/margins_1": 0.8469672203063965, "rewards_train/margins_2": 1.1334574222564697, "step": 351 }, { "epoch": 1.05, "logps_train/policy_1_2": -174.03851318359375, "logps_train/policy_1_l": -129.2679443359375, "logps_train/policy_1_w": -124.86813354492188, "logps_train/policy_2_2": -138.60768127441406, "logps_train/policy_2_w": -169.2548828125, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 0.8883365392684937, "rewards_train/1-l": -1.4525753259658813, "rewards_train/1-w": 2.977248430252075, "rewards_train/2-2": 2.438450813293457, "rewards_train/2-w": 1.6268541812896729, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.4298237562179565, "rewards_train/margins_1": 2.0889118909835815, "rewards_train/margins_2": 0.8115966320037842, "step": 351 }, { "epoch": 1.05, "logps_train/policy_1_2": -93.09807586669922, "logps_train/policy_1_l": -112.83633422851562, "logps_train/policy_1_w": -81.70040893554688, "logps_train/policy_2_2": -76.32770538330078, "logps_train/policy_2_w": -100.16400146484375, "logps_train/ref_1_2": -108.5, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 1.5370674133300781, "rewards_train/1-l": -1.6245880126953125, "rewards_train/1-w": 2.2502713203430176, "rewards_train/2-2": 1.9836357831954956, "rewards_train/2-w": 1.5539124011993408, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.87485933303833, "rewards_train/margins_1": 0.7132039070129395, "rewards_train/margins_2": 0.4297233819961548, "step": 351 }, { "epoch": 1.05, "learning_rate": 2.5e-06, "loss": 0.4756, "step": 352 }, { "epoch": 1.05, "logps_train/policy_1_2": -132.95889282226562, "logps_train/policy_1_l": -152.29953002929688, "logps_train/policy_1_w": -105.9976806640625, "logps_train/policy_2_2": -100.51271057128906, "logps_train/policy_2_w": -147.93603515625, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.5955173969268799, "rewards_train/1-l": -2.1998276710510254, "rewards_train/1-w": 3.6455447673797607, "rewards_train/2-2": 2.7971668243408203, "rewards_train/2-w": 2.1563968658447266, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.845372438430786, "rewards_train/margins_1": 2.050027370452881, "rewards_train/margins_2": 0.6407699584960938, "step": 352 }, { "epoch": 1.05, "logps_train/policy_1_2": -170.79324340820312, "logps_train/policy_1_l": -84.7169418334961, "logps_train/policy_1_w": -102.82926177978516, "logps_train/policy_2_2": -128.0449676513672, "logps_train/policy_2_w": -151.37887573242188, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -79.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 0.439426064491272, "rewards_train/1-l": -0.581850528717041, "rewards_train/1-w": 2.412386417388916, "rewards_train/2-2": 2.510347366333008, "rewards_train/2-w": 0.36758095026016235, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.994236946105957, "rewards_train/margins_1": 1.972960352897644, "rewards_train/margins_2": 2.1427664160728455, "step": 352 }, { "epoch": 1.05, "logps_train/policy_1_2": -247.93321228027344, "logps_train/policy_1_l": -252.44424438476562, "logps_train/policy_1_w": -215.4249267578125, "logps_train/policy_2_2": -202.81475830078125, "logps_train/policy_2_w": -254.5452423095703, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -225.0, "logps_train/ref_1_w": -249.0, "logps_train/ref_2_2": -240.0, "logps_train/ref_2_w": -272.0, "rewards_train/1-2": 1.9746466875076294, "rewards_train/1-l": -2.7702040672302246, "rewards_train/1-w": 3.3196165561676025, "rewards_train/2-2": 3.758368492126465, "rewards_train/2-w": 1.707194447517395, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.089820623397827, "rewards_train/margins_1": 1.3449698686599731, "rewards_train/margins_2": 2.05117404460907, "step": 352 }, { "epoch": 1.05, "logps_train/policy_1_2": -107.28685760498047, "logps_train/policy_1_l": -127.75010681152344, "logps_train/policy_1_w": -112.71613311767578, "logps_train/policy_2_2": -82.61300659179688, "logps_train/policy_2_w": -141.8148956298828, "logps_train/ref_1_2": -126.5, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -110.5, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.9181891679763794, "rewards_train/1-l": -1.6965932846069336, "rewards_train/1-w": 3.265105724334717, "rewards_train/2-2": 2.8074493408203125, "rewards_train/2-w": 2.057572841644287, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.96169900894165, "rewards_train/margins_1": 1.3469165563583374, "rewards_train/margins_2": 0.7498764991760254, "step": 352 }, { "epoch": 1.05, "logps_train/policy_1_2": -159.1702117919922, "logps_train/policy_1_l": -172.4105224609375, "logps_train/policy_1_w": -74.55438995361328, "logps_train/policy_2_2": -133.51596069335938, "logps_train/policy_2_w": -83.88445281982422, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -103.0, "rewards_train/1-2": 0.9228223562240601, "rewards_train/1-l": -2.708240270614624, "rewards_train/1-w": 2.2168264389038086, "rewards_train/2-2": 1.6898099184036255, "rewards_train/2-w": 1.9279611110687256, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.925066709518433, "rewards_train/margins_1": 1.2940040826797485, "rewards_train/margins_2": -0.2381511926651001, "step": 352 }, { "epoch": 1.05, "logps_train/policy_1_2": -194.5367431640625, "logps_train/policy_1_l": -150.07948303222656, "logps_train/policy_1_w": -142.20028686523438, "logps_train/policy_2_2": -155.62234497070312, "logps_train/policy_2_w": -170.2769012451172, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.627967357635498, "rewards_train/1-l": -1.3649801015853882, "rewards_train/1-w": 3.379971981048584, "rewards_train/2-2": 3.1576876640319824, "rewards_train/2-w": 2.328558921813965, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.744952082633972, "rewards_train/margins_1": 1.752004623413086, "rewards_train/margins_2": 0.8291287422180176, "step": 352 }, { "epoch": 1.05, "logps_train/policy_1_2": -162.23492431640625, "logps_train/policy_1_l": -138.09814453125, "logps_train/policy_1_w": -79.11199188232422, "logps_train/policy_2_2": -128.2220001220703, "logps_train/policy_2_w": -99.75569152832031, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 1.9913520812988281, "rewards_train/1-l": -1.8238763809204102, "rewards_train/1-w": 2.669269561767578, "rewards_train/2-2": 2.973112106323242, "rewards_train/2-w": 2.223649740219116, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.493145942687988, "rewards_train/margins_1": 0.67791748046875, "rewards_train/margins_2": 0.749462366104126, "step": 352 }, { "epoch": 1.05, "logps_train/policy_1_2": -164.72726440429688, "logps_train/policy_1_l": -97.70259094238281, "logps_train/policy_1_w": -90.42693328857422, "logps_train/policy_2_2": -123.29893493652344, "logps_train/policy_2_w": -117.74076080322266, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -81.5, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 1.042897343635559, "rewards_train/1-l": -1.647359013557434, "rewards_train/1-w": 2.742462635040283, "rewards_train/2-2": 3.093153715133667, "rewards_train/2-w": 1.7259242534637451, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.389821648597717, "rewards_train/margins_1": 1.6995652914047241, "rewards_train/margins_2": 1.3672294616699219, "step": 352 }, { "epoch": 1.06, "logps_train/policy_1_2": -112.44551086425781, "logps_train/policy_1_l": -112.09180450439453, "logps_train/policy_1_w": -129.40866088867188, "logps_train/policy_2_2": -75.94673156738281, "logps_train/policy_2_w": -176.15647888183594, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -102.5, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.63201105594635, "rewards_train/1-l": -1.4427741765975952, "rewards_train/1-w": 2.9788594245910645, "rewards_train/2-2": 2.6670455932617188, "rewards_train/2-w": 1.1282973289489746, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.42163360118866, "rewards_train/margins_1": 1.3468483686447144, "rewards_train/margins_2": 1.5387482643127441, "step": 353 }, { "epoch": 1.06, "logps_train/policy_1_2": -115.4961929321289, "logps_train/policy_1_l": -122.13333129882812, "logps_train/policy_1_w": -81.52639770507812, "logps_train/policy_2_2": -82.2276611328125, "logps_train/policy_2_w": -119.47183990478516, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -101.5, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 0.7519426941871643, "rewards_train/1-l": -1.3527740240097046, "rewards_train/1-w": 2.709872245788574, "rewards_train/2-2": 1.936218500137329, "rewards_train/2-w": 1.2239093780517578, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.062646269798279, "rewards_train/margins_1": 1.95792955160141, "rewards_train/margins_2": 0.7123091220855713, "step": 353 }, { "epoch": 1.06, "logps_train/policy_1_2": -231.24276733398438, "logps_train/policy_1_l": -137.34445190429688, "logps_train/policy_1_w": -144.42282104492188, "logps_train/policy_2_2": -177.79017639160156, "logps_train/policy_2_w": -203.50350952148438, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.2405668497085571, "rewards_train/1-l": -1.4172576665878296, "rewards_train/1-w": 3.5335004329681396, "rewards_train/2-2": 3.2789902687072754, "rewards_train/2-w": 1.8207427263259888, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.950758099555969, "rewards_train/margins_1": 2.2929335832595825, "rewards_train/margins_2": 1.4582475423812866, "step": 353 }, { "epoch": 1.06, "logps_train/policy_1_2": -76.22090148925781, "logps_train/policy_1_l": -82.85810852050781, "logps_train/policy_1_w": -82.92477416992188, "logps_train/policy_2_2": -59.802764892578125, "logps_train/policy_2_w": -108.4117202758789, "logps_train/ref_1_2": -92.0, "logps_train/ref_1_l": -72.0, "logps_train/ref_1_w": -106.5, "logps_train/ref_2_2": -78.5, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.541972279548645, "rewards_train/1-l": -1.0661821365356445, "rewards_train/1-w": 2.3637728691101074, "rewards_train/2-2": 1.8751921653747559, "rewards_train/2-w": 1.4760156869888306, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.429955005645752, "rewards_train/margins_1": 0.8218005895614624, "rewards_train/margins_2": 0.3991764783859253, "step": 353 }, { "epoch": 1.06, "logps_train/policy_1_2": -179.97048950195312, "logps_train/policy_1_l": -165.47024536132812, "logps_train/policy_1_w": -72.20306396484375, "logps_train/policy_2_2": -130.41139221191406, "logps_train/policy_2_w": -97.18272399902344, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -95.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -113.0, "rewards_train/1-2": 0.6357632875442505, "rewards_train/1-l": -2.4711203575134277, "rewards_train/1-w": 2.2605528831481934, "rewards_train/2-2": 2.6057357788085938, "rewards_train/2-w": 1.5590715408325195, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.731673240661621, "rewards_train/margins_1": 1.6247895956039429, "rewards_train/margins_2": 1.0466642379760742, "step": 353 }, { "epoch": 1.06, "logps_train/policy_1_2": -138.6048583984375, "logps_train/policy_1_l": -147.22628784179688, "logps_train/policy_1_w": -159.2606201171875, "logps_train/policy_2_2": -106.04277038574219, "logps_train/policy_2_w": -200.4654541015625, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 2.1332638263702393, "rewards_train/1-l": -2.0624728202819824, "rewards_train/1-w": 3.9348764419555664, "rewards_train/2-2": 2.913691997528076, "rewards_train/2-w": 2.4050161838531494, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.997349262237549, "rewards_train/margins_1": 1.8016126155853271, "rewards_train/margins_2": 0.5086758136749268, "step": 353 }, { "epoch": 1.06, "logps_train/policy_1_2": -105.57231140136719, "logps_train/policy_1_l": -93.1500244140625, "logps_train/policy_1_w": -48.88153076171875, "logps_train/policy_2_2": -81.33587646484375, "logps_train/policy_2_w": -65.0951156616211, "logps_train/ref_1_2": -117.5, "logps_train/ref_1_l": -76.0, "logps_train/ref_1_w": -68.5, "logps_train/ref_2_2": -102.5, "logps_train/ref_2_w": -77.5, "rewards_train/1-2": 1.1824181079864502, "rewards_train/1-l": -1.6898069381713867, "rewards_train/1-w": 1.9485657215118408, "rewards_train/2-2": 2.105670213699341, "rewards_train/2-w": 1.2471287250518799, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.6383726596832275, "rewards_train/margins_1": 0.7661476135253906, "rewards_train/margins_2": 0.8585414886474609, "step": 353 }, { "epoch": 1.06, "logps_train/policy_1_2": -138.10360717773438, "logps_train/policy_1_l": -135.10877990722656, "logps_train/policy_1_w": -65.99734497070312, "logps_train/policy_2_2": -95.67237854003906, "logps_train/policy_2_w": -88.06791687011719, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -84.5, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -99.0, "rewards_train/1-2": 0.9443271160125732, "rewards_train/1-l": -2.1321678161621094, "rewards_train/1-w": 1.831125020980835, "rewards_train/2-2": 2.517137050628662, "rewards_train/2-w": 1.072505235671997, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9632928371429443, "rewards_train/margins_1": 0.8867979049682617, "rewards_train/margins_2": 1.444631814956665, "step": 353 }, { "epoch": 1.06, "learning_rate": 2.4753023462963363e-06, "loss": 0.5238, "step": 354 }, { "epoch": 1.06, "logps_train/policy_1_2": -136.406982421875, "logps_train/policy_1_l": -158.24435424804688, "logps_train/policy_1_w": -102.67530822753906, "logps_train/policy_2_2": -109.72539520263672, "logps_train/policy_2_w": -133.21200561523438, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.823754906654358, "rewards_train/1-l": -1.4314675331115723, "rewards_train/1-w": 3.138719081878662, "rewards_train/2-2": 2.6118361949920654, "rewards_train/2-w": 1.8998925685882568, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.570186614990234, "rewards_train/margins_1": 1.3149641752243042, "rewards_train/margins_2": 0.7119436264038086, "step": 354 }, { "epoch": 1.06, "logps_train/policy_1_2": -82.57012939453125, "logps_train/policy_1_l": -76.20097351074219, "logps_train/policy_1_w": -74.85246276855469, "logps_train/policy_2_2": -64.38534545898438, "logps_train/policy_2_w": -112.0202407836914, "logps_train/ref_1_2": -94.0, "logps_train/ref_1_l": -68.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -79.5, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 1.1507995128631592, "rewards_train/1-l": -0.854863166809082, "rewards_train/1-w": 2.2772536277770996, "rewards_train/2-2": 1.4958405494689941, "rewards_train/2-w": 0.7925070524215698, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1321167945861816, "rewards_train/margins_1": 1.1264541149139404, "rewards_train/margins_2": 0.7033334970474243, "step": 354 }, { "epoch": 1.06, "logps_train/policy_1_2": -227.59490966796875, "logps_train/policy_1_l": -193.80374145507812, "logps_train/policy_1_w": -182.90463256835938, "logps_train/policy_2_2": -197.09828186035156, "logps_train/policy_2_w": -211.83233642578125, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -226.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 1.8342583179473877, "rewards_train/1-l": -2.1991236209869385, "rewards_train/1-w": 4.290785789489746, "rewards_train/2-2": 3.0526723861694336, "rewards_train/2-w": 3.3667659759521484, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.489909410476685, "rewards_train/margins_1": 2.4565274715423584, "rewards_train/margins_2": -0.31409358978271484, "step": 354 }, { "epoch": 1.06, "logps_train/policy_1_2": -112.39678955078125, "logps_train/policy_1_l": -215.09234619140625, "logps_train/policy_1_w": -134.07003784179688, "logps_train/policy_2_2": -90.92635345458984, "logps_train/policy_2_w": -154.34205627441406, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 2.0532898902893066, "rewards_train/1-l": -2.5104079246520996, "rewards_train/1-w": 2.9765894412994385, "rewards_train/2-2": 2.8409581184387207, "rewards_train/2-w": 2.3173575401306152, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.486997365951538, "rewards_train/margins_1": 0.9232995510101318, "rewards_train/margins_2": 0.5236005783081055, "step": 354 }, { "epoch": 1.06, "logps_train/policy_1_2": -101.98886108398438, "logps_train/policy_1_l": -108.73463439941406, "logps_train/policy_1_w": -99.06887817382812, "logps_train/policy_2_2": -74.69779968261719, "logps_train/policy_2_w": -156.04266357421875, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.4233789443969727, "rewards_train/1-l": -1.4793223142623901, "rewards_train/1-w": 2.646237373352051, "rewards_train/2-2": 2.3497514724731445, "rewards_train/2-w": 0.5824536681175232, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.125559687614441, "rewards_train/margins_1": 1.2228584289550781, "rewards_train/margins_2": 1.7672978043556213, "step": 354 }, { "epoch": 1.06, "logps_train/policy_1_2": -169.2342987060547, "logps_train/policy_1_l": -215.00750732421875, "logps_train/policy_1_w": -147.01995849609375, "logps_train/policy_2_2": -122.11892700195312, "logps_train/policy_2_w": -188.44046020507812, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.9757887125015259, "rewards_train/1-l": -3.071063995361328, "rewards_train/1-w": 3.2855029106140137, "rewards_train/2-2": 3.4920129776000977, "rewards_train/2-w": 1.5961886644363403, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.356566905975342, "rewards_train/margins_1": 1.3097141981124878, "rewards_train/margins_2": 1.8958243131637573, "step": 354 }, { "epoch": 1.06, "logps_train/policy_1_2": -66.57864379882812, "logps_train/policy_1_l": -87.18470764160156, "logps_train/policy_1_w": -52.755523681640625, "logps_train/policy_2_2": -48.5665397644043, "logps_train/policy_2_w": -80.16897583007812, "logps_train/ref_1_2": -75.5, "logps_train/ref_1_l": -74.5, "logps_train/ref_1_w": -74.5, "logps_train/ref_2_2": -63.0, "logps_train/ref_2_w": -94.0, "rewards_train/1-2": 0.882370114326477, "rewards_train/1-l": -1.2707176208496094, "rewards_train/1-w": 2.1901702880859375, "rewards_train/2-2": 1.4206899404525757, "rewards_train/2-w": 1.3647551536560059, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.460887908935547, "rewards_train/margins_1": 1.3078001737594604, "rewards_train/margins_2": 0.055934786796569824, "step": 354 }, { "epoch": 1.06, "logps_train/policy_1_2": -155.73867797851562, "logps_train/policy_1_l": -151.19097900390625, "logps_train/policy_1_w": -134.16896057128906, "logps_train/policy_2_2": -123.66648864746094, "logps_train/policy_2_w": -178.64312744140625, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.418320655822754, "rewards_train/1-l": -1.9558165073394775, "rewards_train/1-w": 3.1206040382385254, "rewards_train/2-2": 2.595850706100464, "rewards_train/2-w": 1.6825627088546753, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.076420545578003, "rewards_train/margins_1": 1.7022833824157715, "rewards_train/margins_2": 0.9132879972457886, "step": 354 }, { "epoch": 1.06, "logps_train/policy_1_2": -141.98831176757812, "logps_train/policy_1_l": -124.95064544677734, "logps_train/policy_1_w": -97.51396179199219, "logps_train/policy_2_2": -112.25643920898438, "logps_train/policy_2_w": -123.49552154541016, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -105.5, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 2.0269503593444824, "rewards_train/1-l": -1.9601036310195923, "rewards_train/1-w": 2.863448143005371, "rewards_train/2-2": 3.303457736968994, "rewards_train/2-w": 1.6621673107147217, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.823551774024963, "rewards_train/margins_1": 0.8364977836608887, "rewards_train/margins_2": 1.6412904262542725, "step": 355 }, { "epoch": 1.06, "logps_train/policy_1_2": -219.7779541015625, "logps_train/policy_1_l": -245.29933166503906, "logps_train/policy_1_w": -151.4705810546875, "logps_train/policy_2_2": -169.77786254882812, "logps_train/policy_2_w": -205.27615356445312, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -214.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -211.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 2.4753293991088867, "rewards_train/1-l": -3.108058452606201, "rewards_train/1-w": 3.977550506591797, "rewards_train/2-2": 4.140963077545166, "rewards_train/2-w": 2.2188687324523926, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.085608959197998, "rewards_train/margins_1": 1.5022211074829102, "rewards_train/margins_2": 1.9220943450927734, "step": 355 }, { "epoch": 1.06, "logps_train/policy_1_2": -151.78648376464844, "logps_train/policy_1_l": -164.95062255859375, "logps_train/policy_1_w": -158.17425537109375, "logps_train/policy_2_2": -115.8724594116211, "logps_train/policy_2_w": -211.32325744628906, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.4443986415863037, "rewards_train/1-l": -1.5587341785430908, "rewards_train/1-w": 3.263823986053467, "rewards_train/2-2": 2.6477153301239014, "rewards_train/2-w": 0.9957993030548096, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.822558164596558, "rewards_train/margins_1": 1.819425344467163, "rewards_train/margins_2": 1.6519160270690918, "step": 355 }, { "epoch": 1.06, "logps_train/policy_1_2": -76.05615234375, "logps_train/policy_1_l": -70.09069061279297, "logps_train/policy_1_w": -24.449134826660156, "logps_train/policy_2_2": -57.23237228393555, "logps_train/policy_2_w": -34.661354064941406, "logps_train/ref_1_2": -88.5, "logps_train/ref_1_l": -57.0, "logps_train/ref_1_w": -39.5, "logps_train/ref_2_2": -75.0, "logps_train/ref_2_w": -44.25, "rewards_train/1-2": 1.239013910293579, "rewards_train/1-l": -1.327330470085144, "rewards_train/1-w": 1.494149088859558, "rewards_train/2-2": 1.7940479516983032, "rewards_train/2-w": 0.9717551469802856, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.821479558944702, "rewards_train/margins_1": 0.255135178565979, "rewards_train/margins_2": 0.8222928047180176, "step": 355 }, { "epoch": 1.06, "logps_train/policy_1_2": -142.65029907226562, "logps_train/policy_1_l": -105.50595092773438, "logps_train/policy_1_w": -104.90592956542969, "logps_train/policy_2_2": -107.80003356933594, "logps_train/policy_2_w": -136.54339599609375, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -92.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.852157711982727, "rewards_train/1-l": -1.336923599243164, "rewards_train/1-w": 2.756281852722168, "rewards_train/2-2": 3.0426523685455322, "rewards_train/2-w": 1.3784725666046143, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.093205451965332, "rewards_train/margins_1": 0.9041241407394409, "rewards_train/margins_2": 1.664179801940918, "step": 355 }, { "epoch": 1.06, "logps_train/policy_1_2": -127.6984634399414, "logps_train/policy_1_l": -79.22413635253906, "logps_train/policy_1_w": -69.24935913085938, "logps_train/policy_2_2": -103.45040130615234, "logps_train/policy_2_w": -84.467041015625, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -68.0, "logps_train/ref_1_w": -90.5, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -98.5, "rewards_train/1-2": 1.4856226444244385, "rewards_train/1-l": -1.147242784500122, "rewards_train/1-w": 2.125063419342041, "rewards_train/2-2": 2.4369912147521973, "rewards_train/2-w": 1.3988041877746582, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.272306203842163, "rewards_train/margins_1": 0.6394407749176025, "rewards_train/margins_2": 1.038187026977539, "step": 355 }, { "epoch": 1.06, "logps_train/policy_1_2": -166.5294189453125, "logps_train/policy_1_l": -188.89944458007812, "logps_train/policy_1_w": -150.2386474609375, "logps_train/policy_2_2": -123.47273254394531, "logps_train/policy_2_w": -210.08718872070312, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 2.1048710346221924, "rewards_train/1-l": -3.4378929138183594, "rewards_train/1-w": 2.9558234214782715, "rewards_train/2-2": 3.579288959503174, "rewards_train/2-w": 0.6662813425064087, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.393716335296631, "rewards_train/margins_1": 0.8509523868560791, "rewards_train/margins_2": 2.913007616996765, "step": 355 }, { "epoch": 1.06, "logps_train/policy_1_2": -122.5710678100586, "logps_train/policy_1_l": -194.04745483398438, "logps_train/policy_1_w": -72.19422149658203, "logps_train/policy_2_2": -99.77870178222656, "logps_train/policy_2_w": -96.51083374023438, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 2.0132060050964355, "rewards_train/1-l": -3.128183126449585, "rewards_train/1-w": 2.9368276596069336, "rewards_train/2-2": 2.717442512512207, "rewards_train/2-w": 2.427041530609131, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.0650107860565186, "rewards_train/margins_1": 0.923621654510498, "rewards_train/margins_2": 0.29040098190307617, "step": 355 }, { "epoch": 1.07, "learning_rate": 2.4506071030401345e-06, "loss": 0.5644, "step": 356 }, { "epoch": 1.07, "logps_train/policy_1_2": -230.89056396484375, "logps_train/policy_1_l": -175.7742156982422, "logps_train/policy_1_w": -138.70359802246094, "logps_train/policy_2_2": -186.1988983154297, "logps_train/policy_2_w": -180.47933959960938, "logps_train/ref_1_2": -252.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -227.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 2.1546926498413086, "rewards_train/1-l": -1.7360161542892456, "rewards_train/1-w": 3.379639148712158, "rewards_train/2-2": 4.101985931396484, "rewards_train/2-w": 2.014566421508789, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.115655303001404, "rewards_train/margins_1": 1.2249464988708496, "rewards_train/margins_2": 2.0874195098876953, "step": 356 }, { "epoch": 1.07, "logps_train/policy_1_2": -181.08456420898438, "logps_train/policy_1_l": -143.6671142578125, "logps_train/policy_1_w": -124.70781707763672, "logps_train/policy_2_2": -132.47103881835938, "logps_train/policy_2_w": -171.06768798828125, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.5852947235107422, "rewards_train/1-l": -1.4825305938720703, "rewards_train/1-w": 3.4635932445526123, "rewards_train/2-2": 3.5060200691223145, "rewards_train/2-w": 2.071356773376465, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.946123838424683, "rewards_train/margins_1": 1.8782985210418701, "rewards_train/margins_2": 1.4346632957458496, "step": 356 }, { "epoch": 1.07, "logps_train/policy_1_2": -212.5548553466797, "logps_train/policy_1_l": -153.86077880859375, "logps_train/policy_1_w": -110.99172973632812, "logps_train/policy_2_2": -172.171875, "logps_train/policy_2_w": -147.802001953125, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -203.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.5710784196853638, "rewards_train/1-l": -1.5634222030639648, "rewards_train/1-w": 2.9461398124694824, "rewards_train/2-2": 3.107813835144043, "rewards_train/2-w": 1.6924569606781006, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.509562015533447, "rewards_train/margins_1": 1.3750613927841187, "rewards_train/margins_2": 1.4153568744659424, "step": 356 }, { "epoch": 1.07, "logps_train/policy_1_2": -91.52136993408203, "logps_train/policy_1_l": -76.57421112060547, "logps_train/policy_1_w": -51.89885711669922, "logps_train/policy_2_2": -74.4353256225586, "logps_train/policy_2_w": -62.19136047363281, "logps_train/ref_1_2": -103.5, "logps_train/ref_1_l": -65.0, "logps_train/ref_1_w": -65.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -72.0, "rewards_train/1-2": 1.2099721431732178, "rewards_train/1-l": -1.1167958974838257, "rewards_train/1-w": 1.314801812171936, "rewards_train/2-2": 1.8738502264022827, "rewards_train/2-w": 0.9688519239425659, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.4315977096557617, "rewards_train/margins_1": 0.10482966899871826, "rewards_train/margins_2": 0.9049983024597168, "step": 356 }, { "epoch": 1.07, "logps_train/policy_1_2": -138.8537139892578, "logps_train/policy_1_l": -96.69078063964844, "logps_train/policy_1_w": -61.003238677978516, "logps_train/policy_2_2": -104.43342590332031, "logps_train/policy_2_w": -88.13533020019531, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -98.5, "rewards_train/1-2": 1.234940767288208, "rewards_train/1-l": -1.378843069076538, "rewards_train/1-w": 1.8840512037277222, "rewards_train/2-2": 2.696110248565674, "rewards_train/2-w": 1.0458422899246216, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2628942728042603, "rewards_train/margins_1": 0.6491104364395142, "rewards_train/margins_2": 1.6502679586410522, "step": 356 }, { "epoch": 1.07, "logps_train/policy_1_2": -115.98385620117188, "logps_train/policy_1_l": -136.27230834960938, "logps_train/policy_1_w": -92.46139526367188, "logps_train/policy_2_2": -91.81098937988281, "logps_train/policy_2_w": -109.6697006225586, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 2.0203638076782227, "rewards_train/1-l": -1.960483193397522, "rewards_train/1-w": 1.8987334966659546, "rewards_train/2-2": 2.7368690967559814, "rewards_train/2-w": 1.3634986877441406, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8592166900634766, "rewards_train/margins_1": -0.12163031101226807, "rewards_train/margins_2": 1.3733704090118408, "step": 356 }, { "epoch": 1.07, "logps_train/policy_1_2": -158.58169555664062, "logps_train/policy_1_l": -114.94837951660156, "logps_train/policy_1_w": -100.78402709960938, "logps_train/policy_2_2": -131.10324096679688, "logps_train/policy_2_w": -129.05836486816406, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.2762060165405273, "rewards_train/1-l": -1.7124156951904297, "rewards_train/1-w": 3.0251128673553467, "rewards_train/2-2": 2.7787389755249023, "rewards_train/2-w": 1.9175043106079102, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.737528562545776, "rewards_train/margins_1": 1.7489068508148193, "rewards_train/margins_2": 0.8612346649169922, "step": 356 }, { "epoch": 1.07, "logps_train/policy_1_2": -176.3757781982422, "logps_train/policy_1_l": -174.7213897705078, "logps_train/policy_1_w": -223.89561462402344, "logps_train/policy_2_2": -145.0338897705078, "logps_train/policy_2_w": -255.35353088378906, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -264.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -280.0, "rewards_train/1-2": 1.9542193412780762, "rewards_train/1-l": -1.812764048576355, "rewards_train/1-w": 4.097938060760498, "rewards_train/2-2": 3.000126600265503, "rewards_train/2-w": 2.3568360805511475, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.910702109336853, "rewards_train/margins_1": 2.143718719482422, "rewards_train/margins_2": 0.6432905197143555, "step": 356 }, { "epoch": 1.07, "logps_train/policy_1_2": -221.35604858398438, "logps_train/policy_1_l": -131.9737548828125, "logps_train/policy_1_w": -120.30699920654297, "logps_train/policy_2_2": -176.2460479736328, "logps_train/policy_2_w": -151.84439086914062, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -217.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.808144450187683, "rewards_train/1-l": -1.0762805938720703, "rewards_train/1-w": 2.641955852508545, "rewards_train/2-2": 4.063676834106445, "rewards_train/2-w": 1.4249351024627686, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.7182364463806152, "rewards_train/margins_1": 0.8338114023208618, "rewards_train/margins_2": 2.6387417316436768, "step": 357 }, { "epoch": 1.07, "logps_train/policy_1_2": -165.5956268310547, "logps_train/policy_1_l": -245.3638916015625, "logps_train/policy_1_w": -185.1357421875, "logps_train/policy_2_2": -138.0462646484375, "logps_train/policy_2_w": -225.74012756347656, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -216.0, "logps_train/ref_1_w": -223.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 2.4427809715270996, "rewards_train/1-l": -2.943859577178955, "rewards_train/1-w": 3.8223636150360107, "rewards_train/2-2": 3.3328726291656494, "rewards_train/2-w": 2.2642688751220703, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.766223192214966, "rewards_train/margins_1": 1.3795826435089111, "rewards_train/margins_2": 1.068603754043579, "step": 357 }, { "epoch": 1.07, "logps_train/policy_1_2": -218.25796508789062, "logps_train/policy_1_l": -126.823974609375, "logps_train/policy_1_w": -155.77093505859375, "logps_train/policy_2_2": -172.9453125, "logps_train/policy_2_w": -199.98410034179688, "logps_train/ref_1_2": -239.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 2.1148290634155273, "rewards_train/1-l": -1.187084674835205, "rewards_train/1-w": 3.2018141746520996, "rewards_train/2-2": 3.6960935592651367, "rewards_train/2-w": 1.371901273727417, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.388898849487305, "rewards_train/margins_1": 1.0869851112365723, "rewards_train/margins_2": 2.3241922855377197, "step": 357 }, { "epoch": 1.07, "logps_train/policy_1_2": -121.7813949584961, "logps_train/policy_1_l": -161.69972229003906, "logps_train/policy_1_w": -103.15312194824219, "logps_train/policy_2_2": -97.27157592773438, "logps_train/policy_2_w": -127.97947692871094, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 2.073423385620117, "rewards_train/1-l": -2.098097324371338, "rewards_train/1-w": 2.7819535732269287, "rewards_train/2-2": 2.5775301456451416, "rewards_train/2-w": 2.049708127975464, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.880050897598267, "rewards_train/margins_1": 0.7085301876068115, "rewards_train/margins_2": 0.5278220176696777, "step": 357 }, { "epoch": 1.07, "logps_train/policy_1_2": -115.08549499511719, "logps_train/policy_1_l": -137.3480224609375, "logps_train/policy_1_w": -135.58987426757812, "logps_train/policy_2_2": -87.82411193847656, "logps_train/policy_2_w": -189.80889892578125, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.8164501190185547, "rewards_train/1-l": -1.5014041662216187, "rewards_train/1-w": 3.8253889083862305, "rewards_train/2-2": 2.8722763061523438, "rewards_train/2-w": 1.7753610610961914, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.326793074607849, "rewards_train/margins_1": 2.008938789367676, "rewards_train/margins_2": 1.0969152450561523, "step": 357 }, { "epoch": 1.07, "logps_train/policy_1_2": -172.69882202148438, "logps_train/policy_1_l": -202.31793212890625, "logps_train/policy_1_w": -103.28168487548828, "logps_train/policy_2_2": -134.9501495361328, "logps_train/policy_2_w": -147.4326629638672, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": 1.5496482849121094, "rewards_train/1-l": -2.519683361053467, "rewards_train/1-w": 3.4538626670837402, "rewards_train/2-2": 3.005767345428467, "rewards_train/2-w": 2.301265239715576, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.973546028137207, "rewards_train/margins_1": 1.9042143821716309, "rewards_train/margins_2": 0.7045021057128906, "step": 357 }, { "epoch": 1.07, "logps_train/policy_1_2": -249.17230224609375, "logps_train/policy_1_l": -200.98394775390625, "logps_train/policy_1_w": -176.76820373535156, "logps_train/policy_2_2": -200.29783630371094, "logps_train/policy_2_w": -219.11599731445312, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -238.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 2.151519536972046, "rewards_train/1-l": -2.242145299911499, "rewards_train/1-w": 4.338804721832275, "rewards_train/2-2": 3.804591655731201, "rewards_train/2-w": 2.7821507453918457, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.580950021743774, "rewards_train/margins_1": 2.1872851848602295, "rewards_train/margins_2": 1.0224409103393555, "step": 357 }, { "epoch": 1.07, "logps_train/policy_1_2": -179.16793823242188, "logps_train/policy_1_l": -253.71163940429688, "logps_train/policy_1_w": -142.43235778808594, "logps_train/policy_2_2": -151.6495361328125, "logps_train/policy_2_w": -178.69921875, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": 2.7332069873809814, "rewards_train/1-l": -2.504757881164551, "rewards_train/1-w": 4.427077293395996, "rewards_train/2-2": 3.9537973403930664, "rewards_train/2-w": 3.0269534587860107, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.931835174560547, "rewards_train/margins_1": 1.6938703060150146, "rewards_train/margins_2": 0.9268438816070557, "step": 357 }, { "epoch": 1.07, "learning_rate": 2.4259166804436008e-06, "loss": 0.4335, "step": 358 }, { "epoch": 1.07, "logps_train/policy_1_2": -137.31402587890625, "logps_train/policy_1_l": -155.6031036376953, "logps_train/policy_1_w": -151.62713623046875, "logps_train/policy_2_2": -115.15816497802734, "logps_train/policy_2_w": -193.37896728515625, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 2.21586275100708, "rewards_train/1-l": -1.4774986505508423, "rewards_train/1-w": 3.2638497352600098, "rewards_train/2-2": 3.2095742225646973, "rewards_train/2-w": 1.1769479513168335, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.741348385810852, "rewards_train/margins_1": 1.0479869842529297, "rewards_train/margins_2": 2.0326262712478638, "step": 358 }, { "epoch": 1.07, "logps_train/policy_1_2": -162.4967498779297, "logps_train/policy_1_l": -135.7966766357422, "logps_train/policy_1_w": -128.7066650390625, "logps_train/policy_2_2": -121.25714111328125, "logps_train/policy_2_w": -175.71507263183594, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -116.5, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.444465160369873, "rewards_train/1-l": -1.9055466651916504, "rewards_train/1-w": 3.569960117340088, "rewards_train/2-2": 2.8107118606567383, "rewards_train/2-w": 2.287868022918701, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.475506782531738, "rewards_train/margins_1": 2.125494956970215, "rewards_train/margins_2": 0.5228438377380371, "step": 358 }, { "epoch": 1.07, "logps_train/policy_1_2": -167.98431396484375, "logps_train/policy_1_l": -158.3106689453125, "logps_train/policy_1_w": -148.91465759277344, "logps_train/policy_2_2": -136.24600219726562, "logps_train/policy_2_w": -194.13720703125, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 2.2859444618225098, "rewards_train/1-l": -1.6216917037963867, "rewards_train/1-w": 3.752284049987793, "rewards_train/2-2": 3.412900447845459, "rewards_train/2-w": 2.209717273712158, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.37397575378418, "rewards_train/margins_1": 1.4663395881652832, "rewards_train/margins_2": 1.2031831741333008, "step": 358 }, { "epoch": 1.07, "logps_train/policy_1_2": -159.49642944335938, "logps_train/policy_1_l": -143.02096557617188, "logps_train/policy_1_w": -154.180419921875, "logps_train/policy_2_2": -122.6387710571289, "logps_train/policy_2_w": -204.4795379638672, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -205.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 1.5737944841384888, "rewards_train/1-l": -1.5436981916427612, "rewards_train/1-w": 5.0350847244262695, "rewards_train/2-2": 3.125185966491699, "rewards_train/2-w": 2.8348593711853027, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.578782916069031, "rewards_train/margins_1": 3.4612902402877808, "rewards_train/margins_2": 0.2903265953063965, "step": 358 }, { "epoch": 1.07, "logps_train/policy_1_2": -138.68923950195312, "logps_train/policy_1_l": -165.64796447753906, "logps_train/policy_1_w": -140.78176879882812, "logps_train/policy_2_2": -109.14239501953125, "logps_train/policy_2_w": -175.27194213867188, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.6541227102279663, "rewards_train/1-l": -2.0441908836364746, "rewards_train/1-w": 3.2454566955566406, "rewards_train/2-2": 3.2302916049957275, "rewards_train/2-w": 1.8277873992919922, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.289647579193115, "rewards_train/margins_1": 1.5913339853286743, "rewards_train/margins_2": 1.4025042057037354, "step": 358 }, { "epoch": 1.07, "logps_train/policy_1_2": -128.91787719726562, "logps_train/policy_1_l": -216.9800567626953, "logps_train/policy_1_w": -83.03184509277344, "logps_train/policy_2_2": -94.66609954833984, "logps_train/policy_2_w": -118.09527587890625, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.3207132816314697, "rewards_train/1-l": -2.576765775680542, "rewards_train/1-w": 1.9138070344924927, "rewards_train/2-2": 2.5619056224823, "rewards_train/2-w": 1.10453462600708, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.490572810173035, "rewards_train/margins_1": 0.593093752861023, "rewards_train/margins_2": 1.4573709964752197, "step": 358 }, { "epoch": 1.07, "logps_train/policy_1_2": -93.96839904785156, "logps_train/policy_1_l": -101.67832946777344, "logps_train/policy_1_w": -69.2291030883789, "logps_train/policy_2_2": -76.26947021484375, "logps_train/policy_2_w": -95.89315795898438, "logps_train/ref_1_2": -107.0, "logps_train/ref_1_l": -86.5, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -112.0, "rewards_train/1-2": 1.3172227144241333, "rewards_train/1-l": -1.5106064081192017, "rewards_train/1-w": 2.2979884147644043, "rewards_train/2-2": 1.7519590854644775, "rewards_train/2-w": 1.6161528825759888, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.808594822883606, "rewards_train/margins_1": 0.980765700340271, "rewards_train/margins_2": 0.13580620288848877, "step": 358 }, { "epoch": 1.07, "logps_train/policy_1_2": -228.36727905273438, "logps_train/policy_1_l": -234.60162353515625, "logps_train/policy_1_w": -155.12940979003906, "logps_train/policy_2_2": -183.5141143798828, "logps_train/policy_2_w": -192.01351928710938, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -220.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 1.9585846662521362, "rewards_train/1-l": -3.553912401199341, "rewards_train/1-w": 3.502683639526367, "rewards_train/2-2": 3.642338275909424, "rewards_train/2-w": 2.389272451400757, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 7.056596040725708, "rewards_train/margins_1": 1.544098973274231, "rewards_train/margins_2": 1.253065824508667, "step": 358 }, { "epoch": 1.07, "logps_train/policy_1_2": -142.062255859375, "logps_train/policy_1_l": -132.8118896484375, "logps_train/policy_1_w": -133.77499389648438, "logps_train/policy_2_2": -118.58145141601562, "logps_train/policy_2_w": -160.348388671875, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 2.4047117233276367, "rewards_train/1-l": -1.4333372116088867, "rewards_train/1-w": 3.531874895095825, "rewards_train/2-2": 3.3649020195007324, "rewards_train/2-w": 2.733912467956543, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.965212106704712, "rewards_train/margins_1": 1.1271631717681885, "rewards_train/margins_2": 0.6309895515441895, "step": 359 }, { "epoch": 1.07, "logps_train/policy_1_2": -191.67974853515625, "logps_train/policy_1_l": -226.6398162841797, "logps_train/policy_1_w": -148.18565368652344, "logps_train/policy_2_2": -155.88137817382812, "logps_train/policy_2_w": -193.28656005859375, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 2.675774097442627, "rewards_train/1-l": -2.1507017612457275, "rewards_train/1-w": 3.875575542449951, "rewards_train/2-2": 3.7743618488311768, "rewards_train/2-w": 2.3760299682617188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.026277303695679, "rewards_train/margins_1": 1.1998014450073242, "rewards_train/margins_2": 1.398331880569458, "step": 359 }, { "epoch": 1.07, "logps_train/policy_1_2": -158.5031280517578, "logps_train/policy_1_l": -98.39958953857422, "logps_train/policy_1_w": -101.40267944335938, "logps_train/policy_2_2": -131.7984619140625, "logps_train/policy_2_w": -141.20994567871094, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 1.419999361038208, "rewards_train/1-l": -1.1409351825714111, "rewards_train/1-w": 2.277310609817505, "rewards_train/2-2": 2.4420294761657715, "rewards_train/2-w": 1.0348646640777588, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.418245792388916, "rewards_train/margins_1": 0.8573112487792969, "rewards_train/margins_2": 1.4071648120880127, "step": 359 }, { "epoch": 1.07, "logps_train/policy_1_2": -105.91497039794922, "logps_train/policy_1_l": -185.21983337402344, "logps_train/policy_1_w": -156.5709686279297, "logps_train/policy_2_2": -86.77757263183594, "logps_train/policy_2_w": -174.70367431640625, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -117.5, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 2.3428778648376465, "rewards_train/1-l": -2.712413787841797, "rewards_train/1-w": 2.211653470993042, "rewards_train/2-2": 3.089430570602417, "rewards_train/2-w": 1.3858827352523804, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.924067258834839, "rewards_train/margins_1": -0.1312243938446045, "rewards_train/margins_2": 1.7035478353500366, "step": 359 }, { "epoch": 1.07, "logps_train/policy_1_2": -108.12005615234375, "logps_train/policy_1_l": -115.0862808227539, "logps_train/policy_1_w": -88.29048919677734, "logps_train/policy_2_2": -79.04779815673828, "logps_train/policy_2_w": -129.8477325439453, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -97.5, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.9006903171539307, "rewards_train/1-l": -0.9906109571456909, "rewards_train/1-w": 2.993558645248413, "rewards_train/2-2": 1.8343801498413086, "rewards_train/2-w": 1.6371020078659058, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.984169602394104, "rewards_train/margins_1": 2.0928683280944824, "rewards_train/margins_2": 0.19727814197540283, "step": 359 }, { "epoch": 1.07, "logps_train/policy_1_2": -191.212890625, "logps_train/policy_1_l": -165.6254425048828, "logps_train/policy_1_w": -119.72293090820312, "logps_train/policy_2_2": -166.27780151367188, "logps_train/policy_2_w": -142.57350158691406, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.591991901397705, "rewards_train/1-l": -1.6054649353027344, "rewards_train/1-w": 2.9300503730773926, "rewards_train/2-2": 2.578078269958496, "rewards_train/2-w": 1.9735090732574463, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.535515308380127, "rewards_train/margins_1": 1.3380584716796875, "rewards_train/margins_2": 0.6045691967010498, "step": 359 }, { "epoch": 1.07, "logps_train/policy_1_2": -259.82470703125, "logps_train/policy_1_l": -163.98573303222656, "logps_train/policy_1_w": -139.79336547851562, "logps_train/policy_2_2": -189.35955810546875, "logps_train/policy_2_w": -188.20724487304688, "logps_train/ref_1_2": -278.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -234.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.7675294876098633, "rewards_train/1-l": -1.0566785335540771, "rewards_train/1-w": 3.149275302886963, "rewards_train/2-2": 4.429667949676514, "rewards_train/2-w": 1.544901967048645, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.20595383644104, "rewards_train/margins_1": 1.3817458152770996, "rewards_train/margins_2": 2.8847659826278687, "step": 359 }, { "epoch": 1.07, "logps_train/policy_1_2": -221.82095336914062, "logps_train/policy_1_l": -169.20367431640625, "logps_train/policy_1_w": -109.80711364746094, "logps_train/policy_2_2": -157.770263671875, "logps_train/policy_2_w": -168.07899475097656, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.0522799491882324, "rewards_train/1-l": -1.945088505744934, "rewards_train/1-w": 3.037536144256592, "rewards_train/2-2": 3.833911180496216, "rewards_train/2-w": 1.3899153470993042, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.982624650001526, "rewards_train/margins_1": 1.9852561950683594, "rewards_train/margins_2": 2.4439958333969116, "step": 359 }, { "epoch": 1.08, "learning_rate": 2.4012334882484554e-06, "loss": 0.6045, "step": 360 }, { "epoch": 1.08, "logps_train/policy_1_2": -154.84060668945312, "logps_train/policy_1_l": -142.4574432373047, "logps_train/policy_1_w": -94.03746032714844, "logps_train/policy_2_2": -118.71949005126953, "logps_train/policy_2_w": -126.4984130859375, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.4948458671569824, "rewards_train/1-l": -1.5211353302001953, "rewards_train/1-w": 3.2962541580200195, "rewards_train/2-2": 2.6503162384033203, "rewards_train/2-w": 1.91070556640625, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.817389488220215, "rewards_train/margins_1": 1.801408290863037, "rewards_train/margins_2": 0.7396106719970703, "step": 360 }, { "epoch": 1.08, "logps_train/policy_1_2": -181.15121459960938, "logps_train/policy_1_l": -208.74307250976562, "logps_train/policy_1_w": -153.62957763671875, "logps_train/policy_2_2": -157.3096160888672, "logps_train/policy_2_w": -181.36431884765625, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": 3.1005043983459473, "rewards_train/1-l": -2.485243320465088, "rewards_train/1-w": 4.252666473388672, "rewards_train/2-2": 3.800288677215576, "rewards_train/2-w": 3.1776304244995117, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.73790979385376, "rewards_train/margins_1": 1.1521620750427246, "rewards_train/margins_2": 0.6226582527160645, "step": 360 }, { "epoch": 1.08, "logps_train/policy_1_2": -152.34048461914062, "logps_train/policy_1_l": -161.09791564941406, "logps_train/policy_1_w": -120.90492248535156, "logps_train/policy_2_2": -133.4315185546875, "logps_train/policy_2_w": -141.49142456054688, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 2.705796003341675, "rewards_train/1-l": -1.704712986946106, "rewards_train/1-w": 3.978257656097412, "rewards_train/2-2": 3.2826285362243652, "rewards_train/2-w": 3.1274189949035645, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.682970643043518, "rewards_train/margins_1": 1.2724616527557373, "rewards_train/margins_2": 0.15520954132080078, "step": 360 }, { "epoch": 1.08, "logps_train/policy_1_2": -104.28666687011719, "logps_train/policy_1_l": -142.48362731933594, "logps_train/policy_1_w": -113.01235961914062, "logps_train/policy_2_2": -83.72492980957031, "logps_train/policy_2_w": -145.15101623535156, "logps_train/ref_1_2": -125.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 2.0494582653045654, "rewards_train/1-l": -2.145871877670288, "rewards_train/1-w": 2.4508635997772217, "rewards_train/2-2": 2.4103200435638428, "rewards_train/2-w": 1.3345927000045776, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.59673547744751, "rewards_train/margins_1": 0.40140533447265625, "rewards_train/margins_2": 1.0757273435592651, "step": 360 }, { "epoch": 1.08, "logps_train/policy_1_2": -179.81771850585938, "logps_train/policy_1_l": -186.54750061035156, "logps_train/policy_1_w": -164.7028045654297, "logps_train/policy_2_2": -138.10263061523438, "logps_train/policy_2_w": -213.7551727294922, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -242.0, "rewards_train/1-2": 2.4932289123535156, "rewards_train/1-l": -1.4102193117141724, "rewards_train/1-w": 3.979719638824463, "rewards_train/2-2": 3.995988368988037, "rewards_train/2-w": 2.8369836807250977, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.389938950538635, "rewards_train/margins_1": 1.4864907264709473, "rewards_train/margins_2": 1.1590046882629395, "step": 360 }, { "epoch": 1.08, "logps_train/policy_1_2": -209.71563720703125, "logps_train/policy_1_l": -199.80230712890625, "logps_train/policy_1_w": -121.09535217285156, "logps_train/policy_2_2": -160.39309692382812, "logps_train/policy_2_w": -156.83697509765625, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 2.415935516357422, "rewards_train/1-l": -2.0364811420440674, "rewards_train/1-w": 3.916637420654297, "rewards_train/2-2": 4.516940116882324, "rewards_train/2-w": 2.45106840133667, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.953118562698364, "rewards_train/margins_1": 1.500701904296875, "rewards_train/margins_2": 2.0658717155456543, "step": 360 }, { "epoch": 1.08, "logps_train/policy_1_2": -193.83514404296875, "logps_train/policy_1_l": -270.7942199707031, "logps_train/policy_1_w": -158.65330505371094, "logps_train/policy_2_2": -152.63934326171875, "logps_train/policy_2_w": -200.6552276611328, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -240.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 2.091484546661377, "rewards_train/1-l": -3.010671377182007, "rewards_train/1-w": 3.8377950191497803, "rewards_train/2-2": 3.9110660552978516, "rewards_train/2-w": 2.159478187561035, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.848466396331787, "rewards_train/margins_1": 1.7463104724884033, "rewards_train/margins_2": 1.7515878677368164, "step": 360 }, { "epoch": 1.08, "logps_train/policy_1_2": -212.67459106445312, "logps_train/policy_1_l": -203.2777099609375, "logps_train/policy_1_w": -174.79330444335938, "logps_train/policy_2_2": -177.01498413085938, "logps_train/policy_2_w": -219.75112915039062, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -225.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -250.0, "rewards_train/1-2": 2.345041513442993, "rewards_train/1-l": -1.3014031648635864, "rewards_train/1-w": 4.997231960296631, "rewards_train/2-2": 3.870378017425537, "rewards_train/2-w": 3.110823631286621, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.298635125160217, "rewards_train/margins_1": 2.6521904468536377, "rewards_train/margins_2": 0.759554386138916, "step": 360 }, { "epoch": 1.08, "logps_train/policy_1_2": -99.87644958496094, "logps_train/policy_1_l": -112.47151184082031, "logps_train/policy_1_w": -64.58651733398438, "logps_train/policy_2_2": -79.55364227294922, "logps_train/policy_2_w": -82.10948181152344, "logps_train/ref_1_2": -109.5, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -100.0, "rewards_train/1-2": 0.9760273694992065, "rewards_train/1-l": -1.6053550243377686, "rewards_train/1-w": 2.383535623550415, "rewards_train/2-2": 1.884869933128357, "rewards_train/2-w": 1.8031147718429565, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.9888906478881836, "rewards_train/margins_1": 1.4075082540512085, "rewards_train/margins_2": 0.08175516128540039, "step": 361 }, { "epoch": 1.08, "logps_train/policy_1_2": -215.85043334960938, "logps_train/policy_1_l": -185.5587158203125, "logps_train/policy_1_w": -136.54751586914062, "logps_train/policy_2_2": -168.950439453125, "logps_train/policy_2_w": -185.8048553466797, "logps_train/ref_1_2": -229.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.3274564743041992, "rewards_train/1-l": -1.7332159280776978, "rewards_train/1-w": 3.294077157974243, "rewards_train/2-2": 2.981518268585205, "rewards_train/2-w": 1.5992019176483154, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.027293086051941, "rewards_train/margins_1": 1.966620683670044, "rewards_train/margins_2": 1.3823163509368896, "step": 361 }, { "epoch": 1.08, "logps_train/policy_1_2": -182.76654052734375, "logps_train/policy_1_l": -138.57778930664062, "logps_train/policy_1_w": -134.8639373779297, "logps_train/policy_2_2": -144.87124633789062, "logps_train/policy_2_w": -179.69651794433594, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 1.873345136642456, "rewards_train/1-l": -1.706215500831604, "rewards_train/1-w": 4.3448567390441895, "rewards_train/2-2": 3.181624412536621, "rewards_train/2-w": 2.3428492546081543, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.0510722398757935, "rewards_train/margins_1": 2.4715116024017334, "rewards_train/margins_2": 0.8387751579284668, "step": 361 }, { "epoch": 1.08, "logps_train/policy_1_2": -231.5821533203125, "logps_train/policy_1_l": -154.14132690429688, "logps_train/policy_1_w": -133.8914337158203, "logps_train/policy_2_2": -184.10162353515625, "logps_train/policy_2_w": -191.70953369140625, "logps_train/ref_1_2": -239.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 0.7588747143745422, "rewards_train/1-l": -1.068624496459961, "rewards_train/1-w": 4.426383972167969, "rewards_train/2-2": 2.796283006668091, "rewards_train/2-w": 2.261272668838501, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.49500846862793, "rewards_train/margins_1": 3.6675092577934265, "rewards_train/margins_2": 0.5350103378295898, "step": 361 }, { "epoch": 1.08, "logps_train/policy_1_2": -146.22769165039062, "logps_train/policy_1_l": -232.9548797607422, "logps_train/policy_1_w": -140.43109130859375, "logps_train/policy_2_2": -111.56161499023438, "logps_train/policy_2_w": -182.79258728027344, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.2994487285614014, "rewards_train/1-l": -2.344705820083618, "rewards_train/1-w": 3.7928290367126465, "rewards_train/2-2": 2.1466712951660156, "rewards_train/2-w": 2.0988659858703613, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.137534856796265, "rewards_train/margins_1": 2.493380308151245, "rewards_train/margins_2": 0.0478053092956543, "step": 361 }, { "epoch": 1.08, "logps_train/policy_1_2": -188.69589233398438, "logps_train/policy_1_l": -143.96722412109375, "logps_train/policy_1_w": -106.13365173339844, "logps_train/policy_2_2": -153.21554565429688, "logps_train/policy_2_w": -134.3074951171875, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 2.8233799934387207, "rewards_train/1-l": -1.74281644821167, "rewards_train/1-w": 3.681947946548462, "rewards_train/2-2": 4.4667277336120605, "rewards_train/2-w": 2.6833128929138184, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.424764394760132, "rewards_train/margins_1": 0.8585679531097412, "rewards_train/margins_2": 1.7834148406982422, "step": 361 }, { "epoch": 1.08, "logps_train/policy_1_2": -190.0333709716797, "logps_train/policy_1_l": -158.49542236328125, "logps_train/policy_1_w": -152.23736572265625, "logps_train/policy_2_2": -150.8839111328125, "logps_train/policy_2_w": -188.86766052246094, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 1.9060373306274414, "rewards_train/1-l": -1.6042299270629883, "rewards_train/1-w": 2.7465758323669434, "rewards_train/2-2": 3.500671148300171, "rewards_train/2-w": 1.4226093292236328, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.350805759429932, "rewards_train/margins_1": 0.840538501739502, "rewards_train/margins_2": 2.078061819076538, "step": 361 }, { "epoch": 1.08, "logps_train/policy_1_2": -241.49363708496094, "logps_train/policy_1_l": -191.71177673339844, "logps_train/policy_1_w": -162.534423828125, "logps_train/policy_2_2": -178.2906036376953, "logps_train/policy_2_w": -227.58016967773438, "logps_train/ref_1_2": -264.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -243.0, "rewards_train/1-2": 2.225637674331665, "rewards_train/1-l": -2.2034037113189697, "rewards_train/1-w": 3.8778085708618164, "rewards_train/2-2": 4.974064826965332, "rewards_train/2-w": 1.5826083421707153, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.081212282180786, "rewards_train/margins_1": 1.6521708965301514, "rewards_train/margins_2": 3.3914564847946167, "step": 361 }, { "epoch": 1.08, "learning_rate": 2.376559935490743e-06, "loss": 0.4077, "step": 362 }, { "epoch": 1.08, "logps_train/policy_1_2": -119.34449768066406, "logps_train/policy_1_l": -112.3149185180664, "logps_train/policy_1_w": -80.05398559570312, "logps_train/policy_2_2": -94.75680541992188, "logps_train/policy_2_w": -111.24705505371094, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -126.5, "rewards_train/1-2": 1.4833242893218994, "rewards_train/1-l": -1.448972463607788, "rewards_train/1-w": 2.3673059940338135, "rewards_train/2-2": 2.1813507080078125, "rewards_train/2-w": 1.5465842485427856, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8162784576416016, "rewards_train/margins_1": 0.8839817047119141, "rewards_train/margins_2": 0.6347664594650269, "step": 362 }, { "epoch": 1.08, "logps_train/policy_1_2": -100.4167251586914, "logps_train/policy_1_l": -118.79045104980469, "logps_train/policy_1_w": -74.2876968383789, "logps_train/policy_2_2": -79.33920288085938, "logps_train/policy_2_w": -100.00637817382812, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -99.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 1.6372337341308594, "rewards_train/1-l": -1.8952550888061523, "rewards_train/1-w": 2.4516990184783936, "rewards_train/2-2": 2.390298366546631, "rewards_train/2-w": 1.426706314086914, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.346954107284546, "rewards_train/margins_1": 0.8144652843475342, "rewards_train/margins_2": 0.9635920524597168, "step": 362 }, { "epoch": 1.08, "logps_train/policy_1_2": -129.00588989257812, "logps_train/policy_1_l": -176.88186645507812, "logps_train/policy_1_w": -109.765380859375, "logps_train/policy_2_2": -108.477294921875, "logps_train/policy_2_w": -137.90235900878906, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.633786916732788, "rewards_train/1-l": -1.764162540435791, "rewards_train/1-w": 2.8906493186950684, "rewards_train/2-2": 2.5178956985473633, "rewards_train/2-w": 1.8941389322280884, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.654811859130859, "rewards_train/margins_1": 1.2568624019622803, "rewards_train/margins_2": 0.6237567663192749, "step": 362 }, { "epoch": 1.08, "logps_train/policy_1_2": -191.26071166992188, "logps_train/policy_1_l": -196.70919799804688, "logps_train/policy_1_w": -118.13652038574219, "logps_train/policy_2_2": -146.90464782714844, "logps_train/policy_2_w": -157.23025512695312, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 1.7731471061706543, "rewards_train/1-l": -2.485373020172119, "rewards_train/1-w": 4.057051181793213, "rewards_train/2-2": 3.516566276550293, "rewards_train/2-w": 2.5347859859466553, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.542424201965332, "rewards_train/margins_1": 2.2839040756225586, "rewards_train/margins_2": 0.9817802906036377, "step": 362 }, { "epoch": 1.08, "logps_train/policy_1_2": -194.81466674804688, "logps_train/policy_1_l": -144.1055908203125, "logps_train/policy_1_w": -101.57600402832031, "logps_train/policy_2_2": -160.30722045898438, "logps_train/policy_2_w": -132.09860229492188, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.7341582775115967, "rewards_train/1-l": -1.9046998023986816, "rewards_train/1-w": 3.4033374786376953, "rewards_train/2-2": 3.1263577938079834, "rewards_train/2-w": 2.3698267936706543, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.308037281036377, "rewards_train/margins_1": 1.6691792011260986, "rewards_train/margins_2": 0.7565310001373291, "step": 362 }, { "epoch": 1.08, "logps_train/policy_1_2": -116.01777648925781, "logps_train/policy_1_l": -132.12088012695312, "logps_train/policy_1_w": -92.06769561767578, "logps_train/policy_2_2": -92.44035339355469, "logps_train/policy_2_w": -130.81021118164062, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.6950979232788086, "rewards_train/1-l": -1.3944129943847656, "rewards_train/1-w": 2.7713546752929688, "rewards_train/2-2": 2.3348705768585205, "rewards_train/2-w": 0.9174172282218933, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.165767669677734, "rewards_train/margins_1": 1.0762567520141602, "rewards_train/margins_2": 1.4174533486366272, "step": 362 }, { "epoch": 1.08, "logps_train/policy_1_2": -94.94263458251953, "logps_train/policy_1_l": -141.0420379638672, "logps_train/policy_1_w": -86.78143310546875, "logps_train/policy_2_2": -77.38331604003906, "logps_train/policy_2_w": -115.70864868164062, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -97.5, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": 1.7096428871154785, "rewards_train/1-l": -1.8920952081680298, "rewards_train/1-w": 3.1347475051879883, "rewards_train/2-2": 2.0062005519866943, "rewards_train/2-w": 2.143979072570801, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.026842713356018, "rewards_train/margins_1": 1.4251046180725098, "rewards_train/margins_2": -0.13777852058410645, "step": 362 }, { "epoch": 1.08, "logps_train/policy_1_2": -78.66780090332031, "logps_train/policy_1_l": -161.4794921875, "logps_train/policy_1_w": -48.97714614868164, "logps_train/policy_2_2": -67.01960754394531, "logps_train/policy_2_w": -67.3016586303711, "logps_train/ref_1_2": -94.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -63.5, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -73.5, "rewards_train/1-2": 1.5035330057144165, "rewards_train/1-l": -2.977051258087158, "rewards_train/1-w": 1.4612696170806885, "rewards_train/2-2": 1.8001880645751953, "rewards_train/2-w": 0.6264750957489014, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.438320875167847, "rewards_train/margins_1": -0.04226338863372803, "rewards_train/margins_2": 1.173712968826294, "step": 362 }, { "epoch": 1.09, "logps_train/policy_1_2": -73.47837829589844, "logps_train/policy_1_l": -89.22660064697266, "logps_train/policy_1_w": -55.27326202392578, "logps_train/policy_2_2": -61.38730239868164, "logps_train/policy_2_w": -74.67842102050781, "logps_train/ref_1_2": -88.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -74.5, "logps_train/ref_2_2": -80.0, "logps_train/ref_2_w": -86.5, "rewards_train/1-2": 1.4657853841781616, "rewards_train/1-l": -1.1221230030059814, "rewards_train/1-w": 1.9112967252731323, "rewards_train/2-2": 1.8311183452606201, "rewards_train/2-w": 1.1863811016082764, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.0334197282791138, "rewards_train/margins_1": 0.4455113410949707, "rewards_train/margins_2": 0.6447372436523438, "step": 363 }, { "epoch": 1.09, "logps_train/policy_1_2": -206.0489501953125, "logps_train/policy_1_l": -193.04718017578125, "logps_train/policy_1_w": -127.93064880371094, "logps_train/policy_2_2": -181.01312255859375, "logps_train/policy_2_w": -167.5129852294922, "logps_train/ref_1_2": -235.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -219.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 2.87947940826416, "rewards_train/1-l": -1.6043272018432617, "rewards_train/1-w": 4.144434928894043, "rewards_train/2-2": 3.848686695098877, "rewards_train/2-w": 2.858076333999634, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.748762130737305, "rewards_train/margins_1": 1.2649555206298828, "rewards_train/margins_2": 0.9906103610992432, "step": 363 }, { "epoch": 1.09, "logps_train/policy_1_2": -143.84674072265625, "logps_train/policy_1_l": -127.84552764892578, "logps_train/policy_1_w": -122.12273406982422, "logps_train/policy_2_2": -109.65081787109375, "logps_train/policy_2_w": -156.1006317138672, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -105.5, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.8840758800506592, "rewards_train/1-l": -2.260333776473999, "rewards_train/1-w": 3.1955387592315674, "rewards_train/2-2": 3.099761486053467, "rewards_train/2-w": 1.749312162399292, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.455872535705566, "rewards_train/margins_1": 1.3114628791809082, "rewards_train/margins_2": 1.3504493236541748, "step": 363 }, { "epoch": 1.09, "logps_train/policy_1_2": -193.48060607910156, "logps_train/policy_1_l": -154.79774475097656, "logps_train/policy_1_w": -135.15133666992188, "logps_train/policy_2_2": -142.0045166015625, "logps_train/policy_2_w": -185.84866333007812, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 2.0644400119781494, "rewards_train/1-l": -1.4041885137557983, "rewards_train/1-w": 4.198929786682129, "rewards_train/2-2": 4.030798435211182, "rewards_train/2-w": 2.465134382247925, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.603118300437927, "rewards_train/margins_1": 2.1344897747039795, "rewards_train/margins_2": 1.5656640529632568, "step": 363 }, { "epoch": 1.09, "logps_train/policy_1_2": -141.19078063964844, "logps_train/policy_1_l": -143.82476806640625, "logps_train/policy_1_w": -91.86859893798828, "logps_train/policy_2_2": -105.98880004882812, "logps_train/policy_2_w": -112.0348129272461, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 1.4121720790863037, "rewards_train/1-l": -2.492633104324341, "rewards_train/1-w": 2.87095308303833, "rewards_train/2-2": 2.432370662689209, "rewards_train/2-w": 2.0840187072753906, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.363586187362671, "rewards_train/margins_1": 1.4587810039520264, "rewards_train/margins_2": 0.34835195541381836, "step": 363 }, { "epoch": 1.09, "logps_train/policy_1_2": -154.9145965576172, "logps_train/policy_1_l": -196.74000549316406, "logps_train/policy_1_w": -120.39019012451172, "logps_train/policy_2_2": -126.28099822998047, "logps_train/policy_2_w": -148.93661499023438, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.6124463081359863, "rewards_train/1-l": -2.2689223289489746, "rewards_train/1-w": 3.1609811782836914, "rewards_train/2-2": 2.8695569038391113, "rewards_train/2-w": 1.9750884771347046, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.429903507232666, "rewards_train/margins_1": 1.548534870147705, "rewards_train/margins_2": 0.8944684267044067, "step": 363 }, { "epoch": 1.09, "logps_train/policy_1_2": -193.7285919189453, "logps_train/policy_1_l": -202.40692138671875, "logps_train/policy_1_w": -128.36233520507812, "logps_train/policy_2_2": -150.69696044921875, "logps_train/policy_2_w": -164.9099884033203, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 2.145890951156616, "rewards_train/1-l": -2.2024097442626953, "rewards_train/1-w": 3.4340779781341553, "rewards_train/2-2": 3.818584442138672, "rewards_train/2-w": 2.3058760166168213, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.636487722396851, "rewards_train/margins_1": 1.288187026977539, "rewards_train/margins_2": 1.5127084255218506, "step": 363 }, { "epoch": 1.09, "logps_train/policy_1_2": -176.6705780029297, "logps_train/policy_1_l": -158.8946533203125, "logps_train/policy_1_w": -155.0594482421875, "logps_train/policy_2_2": -139.3943328857422, "logps_train/policy_2_w": -199.65618896484375, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -221.0, "rewards_train/1-2": 1.2384109497070312, "rewards_train/1-l": -1.4683705568313599, "rewards_train/1-w": 3.7479615211486816, "rewards_train/2-2": 2.8574414253234863, "rewards_train/2-w": 2.1179754734039307, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.2163320779800415, "rewards_train/margins_1": 2.5095505714416504, "rewards_train/margins_2": 0.7394659519195557, "step": 363 }, { "epoch": 1.09, "learning_rate": 2.3518984302657146e-06, "loss": 0.4625, "step": 364 }, { "epoch": 1.09, "logps_train/policy_1_2": -70.4563217163086, "logps_train/policy_1_l": -80.76908111572266, "logps_train/policy_1_w": -68.22854614257812, "logps_train/policy_2_2": -43.50849151611328, "logps_train/policy_2_w": -103.54792785644531, "logps_train/ref_1_2": -78.0, "logps_train/ref_1_l": -73.0, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -60.5, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 0.7778052091598511, "rewards_train/1-l": -0.8262242078781128, "rewards_train/1-w": 2.16796612739563, "rewards_train/2-2": 1.6977837085723877, "rewards_train/2-w": 0.5456950664520264, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.9941903352737427, "rewards_train/margins_1": 1.3901609182357788, "rewards_train/margins_2": 1.1520886421203613, "step": 364 }, { "epoch": 1.09, "logps_train/policy_1_2": -169.2476806640625, "logps_train/policy_1_l": -144.91610717773438, "logps_train/policy_1_w": -127.86430358886719, "logps_train/policy_2_2": -137.66177368164062, "logps_train/policy_2_w": -174.59701538085938, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.961169958114624, "rewards_train/1-l": -2.053719997406006, "rewards_train/1-w": 3.4385697841644287, "rewards_train/2-2": 3.4041361808776855, "rewards_train/2-w": 1.6777982711791992, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.492289781570435, "rewards_train/margins_1": 1.4773998260498047, "rewards_train/margins_2": 1.7263379096984863, "step": 364 }, { "epoch": 1.09, "logps_train/policy_1_2": -122.47274017333984, "logps_train/policy_1_l": -125.87452697753906, "logps_train/policy_1_w": -78.59567260742188, "logps_train/policy_2_2": -85.0032958984375, "logps_train/policy_2_w": -114.75619506835938, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -102.5, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.7416909337043762, "rewards_train/1-l": -2.348292827606201, "rewards_train/1-w": 3.4544949531555176, "rewards_train/2-2": 2.121642827987671, "rewards_train/2-w": 1.9728178977966309, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.802787780761719, "rewards_train/margins_1": 2.7128040194511414, "rewards_train/margins_2": 0.14882493019104004, "step": 364 }, { "epoch": 1.09, "logps_train/policy_1_2": -126.93252563476562, "logps_train/policy_1_l": -183.22198486328125, "logps_train/policy_1_w": -97.90373229980469, "logps_train/policy_2_2": -100.88568115234375, "logps_train/policy_2_w": -153.1068572998047, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 1.3442473411560059, "rewards_train/1-l": -2.644365072250366, "rewards_train/1-w": 2.9549400806427, "rewards_train/2-2": 2.1446595191955566, "rewards_train/2-w": 0.5865805149078369, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.599305152893066, "rewards_train/margins_1": 1.6106927394866943, "rewards_train/margins_2": 1.5580790042877197, "step": 364 }, { "epoch": 1.09, "logps_train/policy_1_2": -158.80599975585938, "logps_train/policy_1_l": -118.32284545898438, "logps_train/policy_1_w": -95.20417022705078, "logps_train/policy_2_2": -118.71160125732422, "logps_train/policy_2_w": -135.4831085205078, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -101.5, "logps_train/ref_1_w": -124.5, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.7362949848175049, "rewards_train/1-l": -1.6608002185821533, "rewards_train/1-w": 2.9010679721832275, "rewards_train/2-2": 2.5740549564361572, "rewards_train/2-w": 1.5512988567352295, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.561868190765381, "rewards_train/margins_1": 2.1647729873657227, "rewards_train/margins_2": 1.0227560997009277, "step": 364 }, { "epoch": 1.09, "logps_train/policy_1_2": -95.19112396240234, "logps_train/policy_1_l": -110.81404876708984, "logps_train/policy_1_w": -67.27865600585938, "logps_train/policy_2_2": -70.9046859741211, "logps_train/policy_2_w": -84.96878051757812, "logps_train/ref_1_2": -108.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -94.5, "rewards_train/1-2": 1.2512001991271973, "rewards_train/1-l": -1.5743736028671265, "rewards_train/1-w": 1.633364200592041, "rewards_train/2-2": 2.3009378910064697, "rewards_train/2-w": 0.9527307748794556, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2077378034591675, "rewards_train/margins_1": 0.38216400146484375, "rewards_train/margins_2": 1.3482071161270142, "step": 364 }, { "epoch": 1.09, "logps_train/policy_1_2": -178.61318969726562, "logps_train/policy_1_l": -80.42750549316406, "logps_train/policy_1_w": -95.83558654785156, "logps_train/policy_2_2": -128.80776977539062, "logps_train/policy_2_w": -140.38706970214844, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -66.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.7996195554733276, "rewards_train/1-l": -1.4212664365768433, "rewards_train/1-w": 3.619565963745117, "rewards_train/2-2": 3.5590662956237793, "rewards_train/2-w": 2.0237936973571777, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.0408324003219604, "rewards_train/margins_1": 1.8199464082717896, "rewards_train/margins_2": 1.5352725982666016, "step": 364 }, { "epoch": 1.09, "logps_train/policy_1_2": -100.85452270507812, "logps_train/policy_1_l": -175.656494140625, "logps_train/policy_1_w": -84.70353698730469, "logps_train/policy_2_2": -76.97688293457031, "logps_train/policy_2_w": -118.07911682128906, "logps_train/ref_1_2": -118.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 1.6723606586456299, "rewards_train/1-l": -3.238306760787964, "rewards_train/1-w": 2.341365337371826, "rewards_train/2-2": 2.513248920440674, "rewards_train/2-w": 0.9803692102432251, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.57967209815979, "rewards_train/margins_1": 0.6690046787261963, "rewards_train/margins_2": 1.5328797101974487, "step": 364 }, { "epoch": 1.09, "logps_train/policy_1_2": -166.76937866210938, "logps_train/policy_1_l": -217.23135375976562, "logps_train/policy_1_w": -138.8717803955078, "logps_train/policy_2_2": -131.49130249023438, "logps_train/policy_2_w": -179.14669799804688, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -193.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 2.1605629920959473, "rewards_train/1-l": -2.461416482925415, "rewards_train/1-w": 4.337821960449219, "rewards_train/2-2": 3.5852441787719727, "rewards_train/2-w": 3.0478291511535645, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.799238443374634, "rewards_train/margins_1": 2.1772589683532715, "rewards_train/margins_2": 0.5374150276184082, "step": 365 }, { "epoch": 1.09, "logps_train/policy_1_2": -237.869873046875, "logps_train/policy_1_l": -233.09730529785156, "logps_train/policy_1_w": -160.46188354492188, "logps_train/policy_2_2": -184.3814697265625, "logps_train/policy_2_w": -223.08047485351562, "logps_train/ref_1_2": -247.0, "logps_train/ref_1_l": -207.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 0.9286378622055054, "rewards_train/1-l": -2.601137161254883, "rewards_train/1-w": 4.924124717712402, "rewards_train/2-2": 3.183727741241455, "rewards_train/2-w": 2.323201894760132, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.525261878967285, "rewards_train/margins_1": 3.995486855506897, "rewards_train/margins_2": 0.8605258464813232, "step": 365 }, { "epoch": 1.09, "logps_train/policy_1_2": -231.68728637695312, "logps_train/policy_1_l": -157.83245849609375, "logps_train/policy_1_w": -120.25074768066406, "logps_train/policy_2_2": -173.04849243164062, "logps_train/policy_2_w": -174.4654541015625, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -205.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 0.8468955159187317, "rewards_train/1-l": -1.9098076820373535, "rewards_train/1-w": 3.1280500888824463, "rewards_train/2-2": 3.163900375366211, "rewards_train/2-w": 1.1347050666809082, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.0378577709198, "rewards_train/margins_1": 2.2811545729637146, "rewards_train/margins_2": 2.0291953086853027, "step": 365 }, { "epoch": 1.09, "logps_train/policy_1_2": -97.8941879272461, "logps_train/policy_1_l": -127.80917358398438, "logps_train/policy_1_w": -92.5379638671875, "logps_train/policy_2_2": -73.68782043457031, "logps_train/policy_2_w": -124.6702880859375, "logps_train/ref_1_2": -111.5, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": 1.3668314218521118, "rewards_train/1-l": -1.4566986560821533, "rewards_train/1-w": 3.0633904933929443, "rewards_train/2-2": 2.054265022277832, "rewards_train/2-w": 1.4767212867736816, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.520089149475098, "rewards_train/margins_1": 1.6965590715408325, "rewards_train/margins_2": 0.5775437355041504, "step": 365 }, { "epoch": 1.09, "logps_train/policy_1_2": -181.28866577148438, "logps_train/policy_1_l": -158.3959503173828, "logps_train/policy_1_w": -69.35795593261719, "logps_train/policy_2_2": -125.57884216308594, "logps_train/policy_2_w": -93.7587890625, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -107.0, "rewards_train/1-2": 1.319962739944458, "rewards_train/1-l": -2.353656530380249, "rewards_train/1-w": 2.2392044067382812, "rewards_train/2-2": 2.8749284744262695, "rewards_train/2-w": 1.3514655828475952, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.59286093711853, "rewards_train/margins_1": 0.9192416667938232, "rewards_train/margins_2": 1.5234628915786743, "step": 365 }, { "epoch": 1.09, "logps_train/policy_1_2": -189.62652587890625, "logps_train/policy_1_l": -230.19400024414062, "logps_train/policy_1_w": -148.54393005371094, "logps_train/policy_2_2": -160.05352783203125, "logps_train/policy_2_w": -190.47613525390625, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 2.138908863067627, "rewards_train/1-l": -3.2676403522491455, "rewards_train/1-w": 3.819826126098633, "rewards_train/2-2": 3.0899596214294434, "rewards_train/2-w": 2.7992618083953857, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.087466478347778, "rewards_train/margins_1": 1.6809172630310059, "rewards_train/margins_2": 0.2906978130340576, "step": 365 }, { "epoch": 1.09, "logps_train/policy_1_2": -32.750579833984375, "logps_train/policy_1_l": -55.18754577636719, "logps_train/policy_1_w": -62.9020881652832, "logps_train/policy_2_2": -22.266143798828125, "logps_train/policy_2_w": -82.98191833496094, "logps_train/ref_1_2": -38.5, "logps_train/ref_1_l": -43.5, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -31.625, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": 0.5944730043411255, "rewards_train/1-l": -1.1710983514785767, "rewards_train/1-w": 1.706666111946106, "rewards_train/2-2": 0.9397920370101929, "rewards_train/2-w": 0.8775891065597534, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.8777644634246826, "rewards_train/margins_1": 1.1121931076049805, "rewards_train/margins_2": 0.06220293045043945, "step": 365 }, { "epoch": 1.09, "logps_train/policy_1_2": -161.7931671142578, "logps_train/policy_1_l": -168.53121948242188, "logps_train/policy_1_w": -112.54365539550781, "logps_train/policy_2_2": -122.83900451660156, "logps_train/policy_2_w": -158.5989990234375, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 2.3884568214416504, "rewards_train/1-l": -1.715428352355957, "rewards_train/1-w": 3.6706347465515137, "rewards_train/2-2": 3.961704730987549, "rewards_train/2-w": 1.574474573135376, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.386063098907471, "rewards_train/margins_1": 1.2821779251098633, "rewards_train/margins_2": 2.387230157852173, "step": 365 }, { "epoch": 1.1, "learning_rate": 2.3272513794928055e-06, "loss": 0.4982, "step": 366 }, { "epoch": 1.1, "logps_train/policy_1_2": -216.99392700195312, "logps_train/policy_1_l": -265.44403076171875, "logps_train/policy_1_w": -170.17056274414062, "logps_train/policy_2_2": -161.2299041748047, "logps_train/policy_2_w": -228.9617919921875, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -238.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 1.9662328958511353, "rewards_train/1-l": -2.8290717601776123, "rewards_train/1-w": 3.551693916320801, "rewards_train/2-2": 3.7301344871520996, "rewards_train/2-w": 1.7694463729858398, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.380765676498413, "rewards_train/margins_1": 1.5854610204696655, "rewards_train/margins_2": 1.9606881141662598, "step": 366 }, { "epoch": 1.1, "logps_train/policy_1_2": -74.97677612304688, "logps_train/policy_1_l": -62.46907043457031, "logps_train/policy_1_w": -91.44097900390625, "logps_train/policy_2_2": -52.45862579345703, "logps_train/policy_2_w": -134.25836181640625, "logps_train/ref_1_2": -84.5, "logps_train/ref_1_l": -50.5, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -67.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 0.9507601857185364, "rewards_train/1-l": -1.187605381011963, "rewards_train/1-w": 1.7725040912628174, "rewards_train/2-2": 1.4510126113891602, "rewards_train/2-w": 0.35463157296180725, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.9601094722747803, "rewards_train/margins_1": 0.821743905544281, "rewards_train/margins_2": 1.096381038427353, "step": 366 }, { "epoch": 1.1, "logps_train/policy_1_2": -122.96531677246094, "logps_train/policy_1_l": -122.01354217529297, "logps_train/policy_1_w": -80.48517608642578, "logps_train/policy_2_2": -98.56552124023438, "logps_train/policy_2_w": -104.64389038085938, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -117.0, "rewards_train/1-2": 1.555030345916748, "rewards_train/1-l": -2.2029166221618652, "rewards_train/1-w": 2.351482629776001, "rewards_train/2-2": 2.5575103759765625, "rewards_train/2-w": 1.2090485095977783, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.554399251937866, "rewards_train/margins_1": 0.7964522838592529, "rewards_train/margins_2": 1.3484618663787842, "step": 366 }, { "epoch": 1.1, "logps_train/policy_1_2": -110.50724792480469, "logps_train/policy_1_l": -136.57022094726562, "logps_train/policy_1_w": -96.15498352050781, "logps_train/policy_2_2": -86.16899871826172, "logps_train/policy_2_w": -126.25104522705078, "logps_train/ref_1_2": -122.5, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 1.2336500883102417, "rewards_train/1-l": -1.7451090812683105, "rewards_train/1-w": 2.3126273155212402, "rewards_train/2-2": 2.173725128173828, "rewards_train/2-w": 1.1553643941879272, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.057736396789551, "rewards_train/margins_1": 1.0789772272109985, "rewards_train/margins_2": 1.0183607339859009, "step": 366 }, { "epoch": 1.1, "logps_train/policy_1_2": -212.14793395996094, "logps_train/policy_1_l": -238.81312561035156, "logps_train/policy_1_w": -177.39520263671875, "logps_train/policy_2_2": -161.78652954101562, "logps_train/policy_2_w": -249.65628051757812, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -214.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -266.0, "rewards_train/1-2": 1.9383304119110107, "rewards_train/1-l": -2.4836578369140625, "rewards_train/1-w": 4.313605785369873, "rewards_train/2-2": 3.774470806121826, "rewards_train/2-w": 1.6921844482421875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.7972636222839355, "rewards_train/margins_1": 2.3752753734588623, "rewards_train/margins_2": 2.0822863578796387, "step": 366 }, { "epoch": 1.1, "logps_train/policy_1_2": -167.94863891601562, "logps_train/policy_1_l": -125.65054321289062, "logps_train/policy_1_w": -108.5016860961914, "logps_train/policy_2_2": -142.5388641357422, "logps_train/policy_2_w": -136.58177185058594, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.0850188732147217, "rewards_train/1-l": -1.5814603567123413, "rewards_train/1-w": 3.764674186706543, "rewards_train/2-2": 2.404024600982666, "rewards_train/2-w": 2.4293227195739746, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.346134543418884, "rewards_train/margins_1": 2.6796553134918213, "rewards_train/margins_2": -0.025298118591308594, "step": 366 }, { "epoch": 1.1, "logps_train/policy_1_2": -147.58807373046875, "logps_train/policy_1_l": -181.98965454101562, "logps_train/policy_1_w": -128.9561004638672, "logps_train/policy_2_2": -125.68525695800781, "logps_train/policy_2_w": -157.19125366210938, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 2.3130688667297363, "rewards_train/1-l": -2.3397860527038574, "rewards_train/1-w": 3.2192347049713135, "rewards_train/2-2": 3.0041306018829346, "rewards_train/2-w": 2.132437229156494, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.559020757675171, "rewards_train/margins_1": 0.9061658382415771, "rewards_train/margins_2": 0.8716933727264404, "step": 366 }, { "epoch": 1.1, "logps_train/policy_1_2": -156.44094848632812, "logps_train/policy_1_l": -141.35577392578125, "logps_train/policy_1_w": -113.98583221435547, "logps_train/policy_2_2": -116.63137817382812, "logps_train/policy_2_w": -161.59613037109375, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.396529197692871, "rewards_train/1-l": -1.7574512958526611, "rewards_train/1-w": 3.034229040145874, "rewards_train/2-2": 2.766549587249756, "rewards_train/2-w": 1.4638245105743408, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.791680335998535, "rewards_train/margins_1": 1.637699842453003, "rewards_train/margins_2": 1.302725076675415, "step": 366 }, { "epoch": 1.1, "logps_train/policy_1_2": -114.02232360839844, "logps_train/policy_1_l": -144.03594970703125, "logps_train/policy_1_w": -84.13478088378906, "logps_train/policy_2_2": -85.69001007080078, "logps_train/policy_2_w": -116.13645935058594, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.9235490560531616, "rewards_train/1-l": -2.0248851776123047, "rewards_train/1-w": 2.8037095069885254, "rewards_train/2-2": 2.7559995651245117, "rewards_train/2-w": 1.2965095043182373, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.82859468460083, "rewards_train/margins_1": 0.8801604509353638, "rewards_train/margins_2": 1.4594900608062744, "step": 367 }, { "epoch": 1.1, "logps_train/policy_1_2": -82.84883117675781, "logps_train/policy_1_l": -132.75961303710938, "logps_train/policy_1_w": -80.33100891113281, "logps_train/policy_2_2": -54.29022979736328, "logps_train/policy_2_w": -109.6060562133789, "logps_train/ref_1_2": -92.5, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -111.5, "logps_train/ref_2_2": -73.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 0.9869914054870605, "rewards_train/1-l": -1.659556269645691, "rewards_train/1-w": 3.1114304065704346, "rewards_train/2-2": 1.9037895202636719, "rewards_train/2-w": 2.0081443786621094, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.7709866762161255, "rewards_train/margins_1": 2.124439001083374, "rewards_train/margins_2": -0.1043548583984375, "step": 367 }, { "epoch": 1.1, "logps_train/policy_1_2": -146.982177734375, "logps_train/policy_1_l": -240.0030517578125, "logps_train/policy_1_w": -157.7114715576172, "logps_train/policy_2_2": -118.25198364257812, "logps_train/policy_2_w": -196.93862915039062, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 2.5861573219299316, "rewards_train/1-l": -3.0577261447906494, "rewards_train/1-w": 3.7710394859313965, "rewards_train/2-2": 3.547457218170166, "rewards_train/2-w": 1.804573893547058, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.828765630722046, "rewards_train/margins_1": 1.1848821640014648, "rewards_train/margins_2": 1.742883324623108, "step": 367 }, { "epoch": 1.1, "logps_train/policy_1_2": -73.02001190185547, "logps_train/policy_1_l": -120.98674011230469, "logps_train/policy_1_w": -75.13665008544922, "logps_train/policy_2_2": -53.44101333618164, "logps_train/policy_2_w": -97.35153198242188, "logps_train/ref_1_2": -85.5, "logps_train/ref_1_l": -104.5, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -74.5, "logps_train/ref_2_w": -115.5, "rewards_train/1-2": 1.2878427505493164, "rewards_train/1-l": -1.6486740112304688, "rewards_train/1-w": 2.9136781692504883, "rewards_train/2-2": 2.0797266960144043, "rewards_train/2-w": 1.7882840633392334, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.562352180480957, "rewards_train/margins_1": 1.6258354187011719, "rewards_train/margins_2": 0.2914426326751709, "step": 367 }, { "epoch": 1.1, "logps_train/policy_1_2": -137.84779357910156, "logps_train/policy_1_l": -75.43025207519531, "logps_train/policy_1_w": -80.88993072509766, "logps_train/policy_2_2": -113.37154388427734, "logps_train/policy_2_w": -103.53544616699219, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -67.5, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 2.1449079513549805, "rewards_train/1-l": -0.7897045612335205, "rewards_train/1-w": 2.5325160026550293, "rewards_train/2-2": 2.8925328254699707, "rewards_train/2-w": 1.6691608428955078, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.32222056388855, "rewards_train/margins_1": 0.38760805130004883, "rewards_train/margins_2": 1.223371982574463, "step": 367 }, { "epoch": 1.1, "logps_train/policy_1_2": -106.29147338867188, "logps_train/policy_1_l": -134.34124755859375, "logps_train/policy_1_w": -101.76444244384766, "logps_train/policy_2_2": -77.80585479736328, "logps_train/policy_2_w": -141.9894561767578, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -95.5, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 0.7763214111328125, "rewards_train/1-l": -1.2821722030639648, "rewards_train/1-w": 2.5852742195129395, "rewards_train/2-2": 1.7766412496566772, "rewards_train/2-w": 1.3057423830032349, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.8674464225769043, "rewards_train/margins_1": 1.808952808380127, "rewards_train/margins_2": 0.4708988666534424, "step": 367 }, { "epoch": 1.1, "logps_train/policy_1_2": -92.52279663085938, "logps_train/policy_1_l": -131.11404418945312, "logps_train/policy_1_w": -115.11754608154297, "logps_train/policy_2_2": -71.30473327636719, "logps_train/policy_2_w": -136.02110290527344, "logps_train/ref_1_2": -107.5, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.5274076461791992, "rewards_train/1-l": -1.3278101682662964, "rewards_train/1-w": 2.7523083686828613, "rewards_train/2-2": 2.278902292251587, "rewards_train/2-w": 1.85570228099823, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.080118536949158, "rewards_train/margins_1": 1.224900722503662, "rewards_train/margins_2": 0.42320001125335693, "step": 367 }, { "epoch": 1.1, "logps_train/policy_1_2": -136.723388671875, "logps_train/policy_1_l": -205.3319854736328, "logps_train/policy_1_w": -97.43657684326172, "logps_train/policy_2_2": -105.70639038085938, "logps_train/policy_2_w": -131.2877655029297, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.9077380895614624, "rewards_train/1-l": -2.1987740993499756, "rewards_train/1-w": 2.5428662300109863, "rewards_train/2-2": 3.0058505535125732, "rewards_train/2-w": 1.4720046520233154, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.741640329360962, "rewards_train/margins_1": 0.6351281404495239, "rewards_train/margins_2": 1.5338459014892578, "step": 367 }, { "epoch": 1.1, "learning_rate": 2.3026211886807205e-06, "loss": 0.5024, "step": 368 }, { "epoch": 1.1, "logps_train/policy_1_2": -189.4320068359375, "logps_train/policy_1_l": -191.41424560546875, "logps_train/policy_1_w": -156.001220703125, "logps_train/policy_2_2": -142.9804229736328, "logps_train/policy_2_w": -200.03143310546875, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 3.1466431617736816, "rewards_train/1-l": -2.5047054290771484, "rewards_train/1-w": 3.604565143585205, "rewards_train/2-2": 4.845708847045898, "rewards_train/2-w": 1.7656059265136719, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.1092705726623535, "rewards_train/margins_1": 0.45792198181152344, "rewards_train/margins_2": 3.0801029205322266, "step": 368 }, { "epoch": 1.1, "logps_train/policy_1_2": -166.08432006835938, "logps_train/policy_1_l": -151.03277587890625, "logps_train/policy_1_w": -155.5604248046875, "logps_train/policy_2_2": -134.63720703125, "logps_train/policy_2_w": -204.22848510742188, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 2.2884421348571777, "rewards_train/1-l": -2.0649960041046143, "rewards_train/1-w": 3.5783333778381348, "rewards_train/2-2": 3.645653247833252, "rewards_train/2-w": 1.4365265369415283, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.643329381942749, "rewards_train/margins_1": 1.289891242980957, "rewards_train/margins_2": 2.2091267108917236, "step": 368 }, { "epoch": 1.1, "logps_train/policy_1_2": -116.86019897460938, "logps_train/policy_1_l": -123.47206115722656, "logps_train/policy_1_w": -156.32061767578125, "logps_train/policy_2_2": -95.36553955078125, "logps_train/policy_2_w": -180.68658447265625, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 2.1517724990844727, "rewards_train/1-l": -1.8544328212738037, "rewards_train/1-w": 2.26637601852417, "rewards_train/2-2": 3.0230164527893066, "rewards_train/2-w": 1.2985281944274902, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.120808839797974, "rewards_train/margins_1": 0.11460351943969727, "rewards_train/margins_2": 1.7244882583618164, "step": 368 }, { "epoch": 1.1, "logps_train/policy_1_2": -116.62406158447266, "logps_train/policy_1_l": -159.96087646484375, "logps_train/policy_1_w": -108.79295349121094, "logps_train/policy_2_2": -90.29801940917969, "logps_train/policy_2_w": -141.37258911132812, "logps_train/ref_1_2": -117.5, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 0.08310158550739288, "rewards_train/1-l": -2.0812432765960693, "rewards_train/1-w": 2.4221696853637695, "rewards_train/2-2": 1.4551589488983154, "rewards_train/2-w": 0.9519991874694824, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.503412961959839, "rewards_train/margins_1": 2.3390680998563766, "rewards_train/margins_2": 0.503159761428833, "step": 368 }, { "epoch": 1.1, "logps_train/policy_1_2": -127.4726333618164, "logps_train/policy_1_l": -75.10127258300781, "logps_train/policy_1_w": -56.916561126708984, "logps_train/policy_2_2": -76.52442169189453, "logps_train/policy_2_w": -100.28268432617188, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -59.75, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -113.0, "rewards_train/1-2": 1.3402365446090698, "rewards_train/1-l": -1.544503092765808, "rewards_train/1-w": 2.911468744277954, "rewards_train/2-2": 3.2944326400756836, "rewards_train/2-w": 1.2498573064804077, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.455971837043762, "rewards_train/margins_1": 1.5712321996688843, "rewards_train/margins_2": 2.044575333595276, "step": 368 }, { "epoch": 1.1, "logps_train/policy_1_2": -219.28651428222656, "logps_train/policy_1_l": -208.89862060546875, "logps_train/policy_1_w": -184.29461669921875, "logps_train/policy_2_2": -170.06341552734375, "logps_train/policy_2_w": -242.8449249267578, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -225.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 1.3432233333587646, "rewards_train/1-l": -2.334880828857422, "rewards_train/1-w": 4.059601306915283, "rewards_train/2-2": 2.8967840671539307, "rewards_train/2-w": 1.3514436483383179, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.394482135772705, "rewards_train/margins_1": 2.7163779735565186, "rewards_train/margins_2": 1.5453404188156128, "step": 368 }, { "epoch": 1.1, "logps_train/policy_1_2": -58.65354919433594, "logps_train/policy_1_l": -33.00275421142578, "logps_train/policy_1_w": -51.38799285888672, "logps_train/policy_2_2": -44.41117477416992, "logps_train/policy_2_w": -70.02379608154297, "logps_train/ref_1_2": -66.0, "logps_train/ref_1_l": -26.875, "logps_train/ref_1_w": -67.0, "logps_train/ref_2_2": -58.0, "logps_train/ref_2_w": -78.5, "rewards_train/1-2": 0.7308365106582642, "rewards_train/1-l": -0.6095528602600098, "rewards_train/1-w": 1.5686225891113281, "rewards_train/2-2": 1.3663045167922974, "rewards_train/2-w": 0.8202767968177795, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.178175449371338, "rewards_train/margins_1": 0.837786078453064, "rewards_train/margins_2": 0.5460277199745178, "step": 368 }, { "epoch": 1.1, "logps_train/policy_1_2": -203.1417236328125, "logps_train/policy_1_l": -293.29296875, "logps_train/policy_1_w": -254.80953979492188, "logps_train/policy_2_2": -167.0943603515625, "logps_train/policy_2_w": -320.1546630859375, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -270.0, "logps_train/ref_1_w": -306.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -344.0, "rewards_train/1-2": 2.9483284950256348, "rewards_train/1-l": -2.329293966293335, "rewards_train/1-w": 5.206544876098633, "rewards_train/2-2": 4.059314727783203, "rewards_train/2-w": 2.490781784057617, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.535838842391968, "rewards_train/margins_1": 2.258216381072998, "rewards_train/margins_2": 1.568532943725586, "step": 368 }, { "epoch": 1.1, "logps_train/policy_1_2": -206.09844970703125, "logps_train/policy_1_l": -237.13351440429688, "logps_train/policy_1_w": -144.24734497070312, "logps_train/policy_2_2": -169.4873046875, "logps_train/policy_2_w": -185.69158935546875, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 3.2667176723480225, "rewards_train/1-l": -2.939621925354004, "rewards_train/1-w": 4.537764549255371, "rewards_train/2-2": 4.529393196105957, "rewards_train/2-w": 3.1808419227600098, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.477386474609375, "rewards_train/margins_1": 1.2710468769073486, "rewards_train/margins_2": 1.3485512733459473, "step": 369 }, { "epoch": 1.1, "logps_train/policy_1_2": -21.69770050048828, "logps_train/policy_1_l": -35.590415954589844, "logps_train/policy_1_w": -23.81089973449707, "logps_train/policy_2_2": -15.132570266723633, "logps_train/policy_2_w": -27.893810272216797, "logps_train/ref_1_2": -29.0, "logps_train/ref_1_l": -26.75, "logps_train/ref_1_w": -34.25, "logps_train/ref_2_2": -24.625, "logps_train/ref_2_w": -35.5, "rewards_train/1-2": 0.7239800691604614, "rewards_train/1-l": -0.8825278282165527, "rewards_train/1-w": 1.0345350503921509, "rewards_train/2-2": 0.9494383335113525, "rewards_train/2-w": 0.7697986364364624, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.9170628786087036, "rewards_train/margins_1": 0.31055498123168945, "rewards_train/margins_2": 0.17963969707489014, "step": 369 }, { "epoch": 1.1, "logps_train/policy_1_2": -214.36288452148438, "logps_train/policy_1_l": -229.64178466796875, "logps_train/policy_1_w": -131.9420166015625, "logps_train/policy_2_2": -160.07708740234375, "logps_train/policy_2_w": -180.90267944335938, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.8473069667816162, "rewards_train/1-l": -2.3032407760620117, "rewards_train/1-w": 3.5932986736297607, "rewards_train/2-2": 4.1422905921936035, "rewards_train/2-w": 1.9581702947616577, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.8965394496917725, "rewards_train/margins_1": 1.7459917068481445, "rewards_train/margins_2": 2.184120297431946, "step": 369 }, { "epoch": 1.1, "logps_train/policy_1_2": -160.2705535888672, "logps_train/policy_1_l": -150.87057495117188, "logps_train/policy_1_w": -116.88835144042969, "logps_train/policy_2_2": -118.64385223388672, "logps_train/policy_2_w": -165.3685760498047, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.7854450941085815, "rewards_train/1-l": -1.120260238647461, "rewards_train/1-w": 3.4517898559570312, "rewards_train/2-2": 3.185614585876465, "rewards_train/2-w": 1.6725165843963623, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.572050094604492, "rewards_train/margins_1": 1.6663447618484497, "rewards_train/margins_2": 1.5130980014801025, "step": 369 }, { "epoch": 1.1, "logps_train/policy_1_2": -132.2508544921875, "logps_train/policy_1_l": -182.67813110351562, "logps_train/policy_1_w": -130.40377807617188, "logps_train/policy_2_2": -104.39460754394531, "logps_train/policy_2_w": -182.13772583007812, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.5796023607254028, "rewards_train/1-l": -1.943399429321289, "rewards_train/1-w": 4.5760297775268555, "rewards_train/2-2": 2.805851936340332, "rewards_train/2-w": 2.403416395187378, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.5194292068481445, "rewards_train/margins_1": 2.9964274168014526, "rewards_train/margins_2": 0.4024355411529541, "step": 369 }, { "epoch": 1.1, "logps_train/policy_1_2": -159.39541625976562, "logps_train/policy_1_l": -194.08595275878906, "logps_train/policy_1_w": -143.40138244628906, "logps_train/policy_2_2": -111.49494934082031, "logps_train/policy_2_w": -212.2899627685547, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -225.0, "rewards_train/1-2": 1.4338971376419067, "rewards_train/1-l": -2.346877336502075, "rewards_train/1-w": 3.0926740169525146, "rewards_train/2-2": 2.873162031173706, "rewards_train/2-w": 1.3057701587677002, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.43955135345459, "rewards_train/margins_1": 1.658776879310608, "rewards_train/margins_2": 1.5673918724060059, "step": 369 }, { "epoch": 1.1, "logps_train/policy_1_2": -78.83108520507812, "logps_train/policy_1_l": -133.74569702148438, "logps_train/policy_1_w": -58.51924133300781, "logps_train/policy_2_2": -57.68143844604492, "logps_train/policy_2_w": -75.70127868652344, "logps_train/ref_1_2": -87.5, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -80.5, "logps_train/ref_2_2": -75.0, "logps_train/ref_2_w": -91.0, "rewards_train/1-2": 0.8481411933898926, "rewards_train/1-l": -2.365975856781006, "rewards_train/1-w": 2.1884567737579346, "rewards_train/2-2": 1.7021689414978027, "rewards_train/2-w": 1.5251851081848145, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.55443263053894, "rewards_train/margins_1": 1.340315580368042, "rewards_train/margins_2": 0.17698383331298828, "step": 369 }, { "epoch": 1.1, "logps_train/policy_1_2": -126.74130249023438, "logps_train/policy_1_l": -168.23110961914062, "logps_train/policy_1_w": -93.61836242675781, "logps_train/policy_2_2": -102.57009887695312, "logps_train/policy_2_w": -113.40917205810547, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -113.5, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.9774322509765625, "rewards_train/1-l": -1.9860000610351562, "rewards_train/1-w": 2.011601448059082, "rewards_train/2-2": 2.4125211238861084, "rewards_train/2-w": 1.2481452226638794, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.9976015090942383, "rewards_train/margins_1": 0.03416919708251953, "rewards_train/margins_2": 1.164375901222229, "step": 369 }, { "epoch": 1.11, "learning_rate": 2.278010261692663e-06, "loss": 0.6313, "step": 370 }, { "epoch": 1.11, "logps_train/policy_1_2": -141.4788055419922, "logps_train/policy_1_l": -145.15228271484375, "logps_train/policy_1_w": -143.01524353027344, "logps_train/policy_2_2": -112.41858673095703, "logps_train/policy_2_w": -186.44302368164062, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.2677443027496338, "rewards_train/1-l": -2.1140565872192383, "rewards_train/1-w": 3.6437878608703613, "rewards_train/2-2": 2.462829351425171, "rewards_train/2-w": 1.9244481325149536, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.7578444480896, "rewards_train/margins_1": 2.3760435581207275, "rewards_train/margins_2": 0.5383812189102173, "step": 370 }, { "epoch": 1.11, "logps_train/policy_1_2": -184.43478393554688, "logps_train/policy_1_l": -157.02285766601562, "logps_train/policy_1_w": -182.5465087890625, "logps_train/policy_2_2": -144.64077758789062, "logps_train/policy_2_w": -229.4380340576172, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -227.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -255.0, "rewards_train/1-2": 1.58933424949646, "rewards_train/1-l": -1.1046292781829834, "rewards_train/1-w": 4.443787574768066, "rewards_train/2-2": 2.87029767036438, "rewards_train/2-w": 2.553070545196533, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.54841685295105, "rewards_train/margins_1": 2.8544533252716064, "rewards_train/margins_2": 0.3172271251678467, "step": 370 }, { "epoch": 1.11, "logps_train/policy_1_2": -166.80392456054688, "logps_train/policy_1_l": -170.44732666015625, "logps_train/policy_1_w": -153.51210021972656, "logps_train/policy_2_2": -140.27333068847656, "logps_train/policy_2_w": -186.76614379882812, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": 1.9378700256347656, "rewards_train/1-l": -1.9404361248016357, "rewards_train/1-w": 3.8529887199401855, "rewards_train/2-2": 3.166417121887207, "rewards_train/2-w": 2.651510238647461, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.793424844741821, "rewards_train/margins_1": 1.91511869430542, "rewards_train/margins_2": 0.5149068832397461, "step": 370 }, { "epoch": 1.11, "logps_train/policy_1_2": -100.05455017089844, "logps_train/policy_1_l": -130.57713317871094, "logps_train/policy_1_w": -90.99816131591797, "logps_train/policy_2_2": -81.25895690917969, "logps_train/policy_2_w": -121.11632537841797, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 2.313295841217041, "rewards_train/1-l": -1.1411120891571045, "rewards_train/1-w": 2.9220590591430664, "rewards_train/2-2": 2.9756665229797363, "rewards_train/2-w": 1.6703991889953613, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.063171148300171, "rewards_train/margins_1": 0.6087632179260254, "rewards_train/margins_2": 1.305267333984375, "step": 370 }, { "epoch": 1.11, "logps_train/policy_1_2": -84.81562805175781, "logps_train/policy_1_l": -173.3453369140625, "logps_train/policy_1_w": -85.04554748535156, "logps_train/policy_2_2": -70.29219055175781, "logps_train/policy_2_w": -122.34825897216797, "logps_train/ref_1_2": -101.5, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.662968635559082, "rewards_train/1-l": -2.878624439239502, "rewards_train/1-w": 2.8751330375671387, "rewards_train/2-2": 2.0606250762939453, "rewards_train/2-w": 1.3507213592529297, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.753757476806641, "rewards_train/margins_1": 1.2121644020080566, "rewards_train/margins_2": 0.7099037170410156, "step": 370 }, { "epoch": 1.11, "logps_train/policy_1_2": -213.96148681640625, "logps_train/policy_1_l": -213.75823974609375, "logps_train/policy_1_w": -134.0729217529297, "logps_train/policy_2_2": -176.7832794189453, "logps_train/policy_2_w": -166.5137939453125, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 2.103851318359375, "rewards_train/1-l": -2.6144962310791016, "rewards_train/1-w": 3.6770827770233154, "rewards_train/2-2": 3.8497962951660156, "rewards_train/2-w": 2.626746416091919, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.291579008102417, "rewards_train/margins_1": 1.5732314586639404, "rewards_train/margins_2": 1.2230498790740967, "step": 370 }, { "epoch": 1.11, "logps_train/policy_1_2": -197.75209045410156, "logps_train/policy_1_l": -141.20115661621094, "logps_train/policy_1_w": -151.21853637695312, "logps_train/policy_2_2": -142.34402465820312, "logps_train/policy_2_w": -212.46298217773438, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -225.0, "rewards_train/1-2": 1.344322681427002, "rewards_train/1-l": -1.6959941387176514, "rewards_train/1-w": 3.307833671569824, "rewards_train/2-2": 3.5859110355377197, "rewards_train/2-w": 1.306827425956726, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.003827810287476, "rewards_train/margins_1": 1.9635109901428223, "rewards_train/margins_2": 2.2790836095809937, "step": 370 }, { "epoch": 1.11, "logps_train/policy_1_2": -156.70188903808594, "logps_train/policy_1_l": -155.49478149414062, "logps_train/policy_1_w": -143.06985473632812, "logps_train/policy_2_2": -112.83490753173828, "logps_train/policy_2_w": -201.37783813476562, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 1.7282487154006958, "rewards_train/1-l": -2.0158841609954834, "rewards_train/1-w": 3.7555150985717773, "rewards_train/2-2": 3.085259437561035, "rewards_train/2-w": 1.5215911865234375, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.771399259567261, "rewards_train/margins_1": 2.0272663831710815, "rewards_train/margins_2": 1.5636682510375977, "step": 370 }, { "epoch": 1.11, "logps_train/policy_1_2": -274.16009521484375, "logps_train/policy_1_l": -231.5293731689453, "logps_train/policy_1_w": -187.004638671875, "logps_train/policy_2_2": -234.08465576171875, "logps_train/policy_2_w": -238.63601684570312, "logps_train/ref_1_2": -296.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -238.0, "logps_train/ref_2_2": -270.0, "logps_train/ref_2_w": -270.0, "rewards_train/1-2": 2.2402403354644775, "rewards_train/1-l": -2.2107503414154053, "rewards_train/1-w": 5.143287181854248, "rewards_train/2-2": 3.6602845191955566, "rewards_train/2-w": 3.0801491737365723, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.354037523269653, "rewards_train/margins_1": 2.9030468463897705, "rewards_train/margins_2": 0.5801353454589844, "step": 371 }, { "epoch": 1.11, "logps_train/policy_1_2": -157.85704040527344, "logps_train/policy_1_l": -173.85025024414062, "logps_train/policy_1_w": -164.86538696289062, "logps_train/policy_2_2": -120.88145446777344, "logps_train/policy_2_w": -210.76498413085938, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 2.2924208641052246, "rewards_train/1-l": -2.0375630855560303, "rewards_train/1-w": 2.596078872680664, "rewards_train/2-2": 3.5134174823760986, "rewards_train/2-w": 0.8059734106063843, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.633641958236694, "rewards_train/margins_1": 0.30365800857543945, "rewards_train/margins_2": 2.7074440717697144, "step": 371 }, { "epoch": 1.11, "logps_train/policy_1_2": -167.6741943359375, "logps_train/policy_1_l": -172.7127227783203, "logps_train/policy_1_w": -86.9198226928711, "logps_train/policy_2_2": -133.84286499023438, "logps_train/policy_2_w": -111.58975219726562, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -111.5, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 1.3978153467178345, "rewards_train/1-l": -2.689436435699463, "rewards_train/1-w": 2.4591898918151855, "rewards_train/2-2": 2.812197685241699, "rewards_train/2-w": 1.651962399482727, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.148626327514648, "rewards_train/margins_1": 1.061374545097351, "rewards_train/margins_2": 1.1602352857589722, "step": 371 }, { "epoch": 1.11, "logps_train/policy_1_2": -108.50442504882812, "logps_train/policy_1_l": -146.42782592773438, "logps_train/policy_1_w": -141.3045654296875, "logps_train/policy_2_2": -80.23597717285156, "logps_train/policy_2_w": -168.81471252441406, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -103.5, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.2761201858520508, "rewards_train/1-l": -1.9062588214874268, "rewards_train/1-w": 2.4248180389404297, "rewards_train/2-2": 2.3002305030822754, "rewards_train/2-w": 1.0792707204818726, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.3310768604278564, "rewards_train/margins_1": 1.148697853088379, "rewards_train/margins_2": 1.2209597826004028, "step": 371 }, { "epoch": 1.11, "logps_train/policy_1_2": -162.68341064453125, "logps_train/policy_1_l": -190.77513122558594, "logps_train/policy_1_w": -115.36457824707031, "logps_train/policy_2_2": -115.71812438964844, "logps_train/policy_2_w": -160.24856567382812, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 0.9472842216491699, "rewards_train/1-l": -4.161888599395752, "rewards_train/1-w": 3.481511354446411, "rewards_train/2-2": 2.6188125610351562, "rewards_train/2-w": 1.5720185041427612, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.643399953842163, "rewards_train/margins_1": 2.534227132797241, "rewards_train/margins_2": 1.046794056892395, "step": 371 }, { "epoch": 1.11, "logps_train/policy_1_2": -164.1769256591797, "logps_train/policy_1_l": -101.44544982910156, "logps_train/policy_1_w": -86.8934097290039, "logps_train/policy_2_2": -129.00189208984375, "logps_train/policy_2_w": -113.32305908203125, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -113.5, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.4291820526123047, "rewards_train/1-l": -1.1242330074310303, "rewards_train/1-w": 2.689565658569336, "rewards_train/2-2": 3.050590991973877, "rewards_train/2-w": 1.7380061149597168, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.813798666000366, "rewards_train/margins_1": 1.2603836059570312, "rewards_train/margins_2": 1.3125848770141602, "step": 371 }, { "epoch": 1.11, "logps_train/policy_1_2": -121.892822265625, "logps_train/policy_1_l": -85.68923950195312, "logps_train/policy_1_w": -49.326507568359375, "logps_train/policy_2_2": -98.1722640991211, "logps_train/policy_2_w": -67.19960021972656, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -69.5, "logps_train/ref_1_w": -73.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -86.0, "rewards_train/1-2": 1.4866943359375, "rewards_train/1-l": -1.6157996654510498, "rewards_train/1-w": 2.398599147796631, "rewards_train/2-2": 2.540781021118164, "rewards_train/2-w": 1.875352144241333, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.014398813247681, "rewards_train/margins_1": 0.9119048118591309, "rewards_train/margins_2": 0.665428876876831, "step": 371 }, { "epoch": 1.11, "logps_train/policy_1_2": -153.94757080078125, "logps_train/policy_1_l": -165.802001953125, "logps_train/policy_1_w": -73.10572052001953, "logps_train/policy_2_2": -129.82986450195312, "logps_train/policy_2_w": -99.53695678710938, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 1.781805396080017, "rewards_train/1-l": -2.109008550643921, "rewards_train/1-w": 2.8539791107177734, "rewards_train/2-2": 2.905684232711792, "rewards_train/2-w": 1.7959139347076416, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.962987661361694, "rewards_train/margins_1": 1.0721737146377563, "rewards_train/margins_2": 1.1097702980041504, "step": 371 }, { "epoch": 1.11, "learning_rate": 2.253421000511721e-06, "loss": 0.4882, "step": 372 }, { "epoch": 1.11, "logps_train/policy_1_2": -135.31593322753906, "logps_train/policy_1_l": -136.4502410888672, "logps_train/policy_1_w": -76.9379653930664, "logps_train/policy_2_2": -101.07247924804688, "logps_train/policy_2_w": -108.62239074707031, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 1.3793444633483887, "rewards_train/1-l": -1.8856499195098877, "rewards_train/1-w": 2.2871603965759277, "rewards_train/2-2": 2.486501455307007, "rewards_train/2-w": 1.3432292938232422, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.172810316085815, "rewards_train/margins_1": 0.9078159332275391, "rewards_train/margins_2": 1.1432721614837646, "step": 372 }, { "epoch": 1.11, "logps_train/policy_1_2": -256.38885498046875, "logps_train/policy_1_l": -297.113037109375, "logps_train/policy_1_w": -198.68234252929688, "logps_train/policy_2_2": -215.5226287841797, "logps_train/policy_2_w": -239.83450317382812, "logps_train/ref_1_2": -288.0, "logps_train/ref_1_l": -260.0, "logps_train/ref_1_w": -241.0, "logps_train/ref_2_2": -264.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 3.0923633575439453, "rewards_train/1-l": -3.6659889221191406, "rewards_train/1-w": 4.184892177581787, "rewards_train/2-2": 4.785238265991211, "rewards_train/2-w": 2.6993637084960938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.850881099700928, "rewards_train/margins_1": 1.0925288200378418, "rewards_train/margins_2": 2.085874557495117, "step": 372 }, { "epoch": 1.11, "logps_train/policy_1_2": -139.7044677734375, "logps_train/policy_1_l": -134.55323791503906, "logps_train/policy_1_w": -106.4853286743164, "logps_train/policy_2_2": -110.49398803710938, "logps_train/policy_2_w": -131.4805145263672, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.1920524835586548, "rewards_train/1-l": -1.6895517110824585, "rewards_train/1-w": 2.9006857872009277, "rewards_train/2-2": 2.34356951713562, "rewards_train/2-w": 2.1558542251586914, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.590237498283386, "rewards_train/margins_1": 1.708633303642273, "rewards_train/margins_2": 0.1877152919769287, "step": 372 }, { "epoch": 1.11, "logps_train/policy_1_2": -186.42396545410156, "logps_train/policy_1_l": -138.2827606201172, "logps_train/policy_1_w": -98.1166000366211, "logps_train/policy_2_2": -143.3451690673828, "logps_train/policy_2_w": -133.64280700683594, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 0.6138529777526855, "rewards_train/1-l": -1.5589401721954346, "rewards_train/1-w": 3.4113869667053223, "rewards_train/2-2": 2.691655158996582, "rewards_train/2-w": 1.723219871520996, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.970327138900757, "rewards_train/margins_1": 2.7975339889526367, "rewards_train/margins_2": 0.9684352874755859, "step": 372 }, { "epoch": 1.11, "logps_train/policy_1_2": -201.64447021484375, "logps_train/policy_1_l": -272.30963134765625, "logps_train/policy_1_w": -175.99923706054688, "logps_train/policy_2_2": -147.607421875, "logps_train/policy_2_w": -227.34007263183594, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -234.0, "logps_train/ref_1_w": -213.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 1.3730530738830566, "rewards_train/1-l": -3.8430733680725098, "rewards_train/1-w": 3.6836700439453125, "rewards_train/2-2": 3.4833993911743164, "rewards_train/2-w": 1.309743046760559, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.526743412017822, "rewards_train/margins_1": 2.310616970062256, "rewards_train/margins_2": 2.1736563444137573, "step": 372 }, { "epoch": 1.11, "logps_train/policy_1_2": -119.72792053222656, "logps_train/policy_1_l": -116.23548126220703, "logps_train/policy_1_w": -118.6451416015625, "logps_train/policy_2_2": -93.07676696777344, "logps_train/policy_2_w": -161.5861053466797, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -100.5, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 2.0615830421447754, "rewards_train/1-l": -1.5809696912765503, "rewards_train/1-w": 2.4882194995880127, "rewards_train/2-2": 2.9188859462738037, "rewards_train/2-w": 0.5980298519134521, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.069189190864563, "rewards_train/margins_1": 0.4266364574432373, "rewards_train/margins_2": 2.3208560943603516, "step": 372 }, { "epoch": 1.11, "logps_train/policy_1_2": -185.03494262695312, "logps_train/policy_1_l": -154.71585083007812, "logps_train/policy_1_w": -116.97193908691406, "logps_train/policy_2_2": -149.25636291503906, "logps_train/policy_2_w": -151.98373413085938, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 2.268381118774414, "rewards_train/1-l": -1.1989295482635498, "rewards_train/1-w": 3.393040657043457, "rewards_train/2-2": 3.8681137561798096, "rewards_train/2-w": 2.0438146591186523, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.591970205307007, "rewards_train/margins_1": 1.124659538269043, "rewards_train/margins_2": 1.8242990970611572, "step": 372 }, { "epoch": 1.11, "logps_train/policy_1_2": -122.29965209960938, "logps_train/policy_1_l": -124.66954803466797, "logps_train/policy_1_w": -107.40249633789062, "logps_train/policy_2_2": -94.15612030029297, "logps_train/policy_2_w": -133.18516540527344, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.5801911354064941, "rewards_train/1-l": -2.257091999053955, "rewards_train/1-w": 3.0071136951446533, "rewards_train/2-2": 2.7214975357055664, "rewards_train/2-w": 1.9658581018447876, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.264205694198608, "rewards_train/margins_1": 1.4269225597381592, "rewards_train/margins_2": 0.7556394338607788, "step": 372 }, { "epoch": 1.12, "logps_train/policy_1_2": -63.314796447753906, "logps_train/policy_1_l": -101.70159912109375, "logps_train/policy_1_w": -111.10079193115234, "logps_train/policy_2_2": -47.27131652832031, "logps_train/policy_2_w": -140.0519561767578, "logps_train/ref_1_2": -78.0, "logps_train/ref_1_l": -86.5, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -69.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.4818015098571777, "rewards_train/1-l": -1.5185976028442383, "rewards_train/1-w": 2.6461703777313232, "rewards_train/2-2": 2.1486501693725586, "rewards_train/2-w": 1.172928810119629, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.1647679805755615, "rewards_train/margins_1": 1.1643688678741455, "rewards_train/margins_2": 0.9757213592529297, "step": 373 }, { "epoch": 1.12, "logps_train/policy_1_2": -176.8636474609375, "logps_train/policy_1_l": -165.83668518066406, "logps_train/policy_1_w": -112.9192886352539, "logps_train/policy_2_2": -125.25279998779297, "logps_train/policy_2_w": -158.90142822265625, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.755821943283081, "rewards_train/1-l": -2.290407180786133, "rewards_train/1-w": 3.290102005004883, "rewards_train/2-2": 3.9965949058532715, "rewards_train/2-w": 1.9028258323669434, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.580509185791016, "rewards_train/margins_1": 1.5342800617218018, "rewards_train/margins_2": 2.093769073486328, "step": 373 }, { "epoch": 1.12, "logps_train/policy_1_2": -93.77162170410156, "logps_train/policy_1_l": -69.4581298828125, "logps_train/policy_1_w": -67.49666595458984, "logps_train/policy_2_2": -74.26741790771484, "logps_train/policy_2_w": -86.45740509033203, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -58.5, "logps_train/ref_1_w": -94.0, "logps_train/ref_2_2": -99.0, "logps_train/ref_2_w": -107.0, "rewards_train/1-2": 1.7181497812271118, "rewards_train/1-l": -1.083117127418518, "rewards_train/1-w": 2.6339271068573, "rewards_train/2-2": 2.4888832569122314, "rewards_train/2-w": 2.0101189613342285, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.717044234275818, "rewards_train/margins_1": 0.915777325630188, "rewards_train/margins_2": 0.47876429557800293, "step": 373 }, { "epoch": 1.12, "logps_train/policy_1_2": -174.39979553222656, "logps_train/policy_1_l": -202.47518920898438, "logps_train/policy_1_w": -103.82914733886719, "logps_train/policy_2_2": -116.58580017089844, "logps_train/policy_2_w": -146.26004028320312, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 0.6139264702796936, "rewards_train/1-l": -2.683065891265869, "rewards_train/1-w": 2.5889596939086914, "rewards_train/2-2": 2.770326614379883, "rewards_train/2-w": 1.008371114730835, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.2720255851745605, "rewards_train/margins_1": 1.9750332236289978, "rewards_train/margins_2": 1.7619554996490479, "step": 373 }, { "epoch": 1.12, "logps_train/policy_1_2": -168.59103393554688, "logps_train/policy_1_l": -164.43350219726562, "logps_train/policy_1_w": -119.6937484741211, "logps_train/policy_2_2": -130.57470703125, "logps_train/policy_2_w": -155.30264282226562, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 2.1112093925476074, "rewards_train/1-l": -2.664442539215088, "rewards_train/1-w": 3.7462503910064697, "rewards_train/2-2": 3.4417479038238525, "rewards_train/2-w": 2.335360527038574, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.410692930221558, "rewards_train/margins_1": 1.6350409984588623, "rewards_train/margins_2": 1.1063873767852783, "step": 373 }, { "epoch": 1.12, "logps_train/policy_1_2": -187.34396362304688, "logps_train/policy_1_l": -154.06228637695312, "logps_train/policy_1_w": -114.25630187988281, "logps_train/policy_2_2": -141.4681396484375, "logps_train/policy_2_w": -173.977294921875, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.682792067527771, "rewards_train/1-l": -1.5505642890930176, "rewards_train/1-w": 3.7868704795837402, "rewards_train/2-2": 3.39068603515625, "rewards_train/2-w": 1.421020746231079, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.337434768676758, "rewards_train/margins_1": 2.1040784120559692, "rewards_train/margins_2": 1.969665288925171, "step": 373 }, { "epoch": 1.12, "logps_train/policy_1_2": -126.9761962890625, "logps_train/policy_1_l": -171.7314453125, "logps_train/policy_1_w": -130.69937133789062, "logps_train/policy_2_2": -94.4786148071289, "logps_train/policy_2_w": -186.18966674804688, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.3070683479309082, "rewards_train/1-l": -2.2565431594848633, "rewards_train/1-w": 3.266781806945801, "rewards_train/2-2": 2.4326071739196777, "rewards_train/2-w": 1.324782371520996, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.523324966430664, "rewards_train/margins_1": 1.9597134590148926, "rewards_train/margins_2": 1.1078248023986816, "step": 373 }, { "epoch": 1.12, "logps_train/policy_1_2": -189.17601013183594, "logps_train/policy_1_l": -157.462890625, "logps_train/policy_1_w": -127.52703857421875, "logps_train/policy_2_2": -149.67135620117188, "logps_train/policy_2_w": -156.34405517578125, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.0788828134536743, "rewards_train/1-l": -2.5128917694091797, "rewards_train/1-w": 3.751202344894409, "rewards_train/2-2": 2.8965864181518555, "rewards_train/2-w": 2.693718910217285, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.264094114303589, "rewards_train/margins_1": 2.672319531440735, "rewards_train/margins_2": 0.2028675079345703, "step": 373 }, { "epoch": 1.12, "learning_rate": 2.2288558050064366e-06, "loss": 0.4134, "step": 374 }, { "epoch": 1.12, "logps_train/policy_1_2": -90.12318420410156, "logps_train/policy_1_l": -92.83367919921875, "logps_train/policy_1_w": -46.23862838745117, "logps_train/policy_2_2": -61.73561477661133, "logps_train/policy_2_w": -77.72511291503906, "logps_train/ref_1_2": -107.5, "logps_train/ref_1_l": -71.5, "logps_train/ref_1_w": -74.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": 1.7318224906921387, "rewards_train/1-l": -2.1462583541870117, "rewards_train/1-w": 2.7980122566223145, "rewards_train/2-2": 2.9389383792877197, "rewards_train/2-w": 1.5142078399658203, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.944270610809326, "rewards_train/margins_1": 1.0661897659301758, "rewards_train/margins_2": 1.4247305393218994, "step": 374 }, { "epoch": 1.12, "logps_train/policy_1_2": -93.91668701171875, "logps_train/policy_1_l": -187.41934204101562, "logps_train/policy_1_w": -130.75161743164062, "logps_train/policy_2_2": -68.98129272460938, "logps_train/policy_2_w": -181.54653930664062, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.5333311557769775, "rewards_train/1-l": -2.641934871673584, "rewards_train/1-w": 4.056089401245117, "rewards_train/2-2": 2.166714906692505, "rewards_train/2-w": 2.0140953063964844, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.698024272918701, "rewards_train/margins_1": 2.5227582454681396, "rewards_train/margins_2": 0.1526196002960205, "step": 374 }, { "epoch": 1.12, "logps_train/policy_1_2": -153.4854736328125, "logps_train/policy_1_l": -136.17947387695312, "logps_train/policy_1_w": -100.35039520263672, "logps_train/policy_2_2": -109.39005279541016, "logps_train/policy_2_w": -131.43856811523438, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 0.9178589582443237, "rewards_train/1-l": -2.0233190059661865, "rewards_train/1-w": 2.7372260093688965, "rewards_train/2-2": 2.61177659034729, "rewards_train/2-w": 1.5229394435882568, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.760545015335083, "rewards_train/margins_1": 1.8193670511245728, "rewards_train/margins_2": 1.0888371467590332, "step": 374 }, { "epoch": 1.12, "logps_train/policy_1_2": -182.15579223632812, "logps_train/policy_1_l": -121.98847961425781, "logps_train/policy_1_w": -108.14241790771484, "logps_train/policy_2_2": -139.78945922851562, "logps_train/policy_2_w": -149.90155029296875, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -109.5, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.8281705379486084, "rewards_train/1-l": -1.2601760625839233, "rewards_train/1-w": 3.8623206615448, "rewards_train/2-2": 3.492929697036743, "rewards_train/2-w": 2.0442209243774414, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.122496724128723, "rewards_train/margins_1": 2.0341501235961914, "rewards_train/margins_2": 1.4487087726593018, "step": 374 }, { "epoch": 1.12, "logps_train/policy_1_2": -147.06996154785156, "logps_train/policy_1_l": -154.05673217773438, "logps_train/policy_1_w": -170.36297607421875, "logps_train/policy_2_2": -109.91276550292969, "logps_train/policy_2_w": -208.40151977539062, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 2.0719101428985596, "rewards_train/1-l": -1.9994226694107056, "rewards_train/1-w": 3.9937801361083984, "rewards_train/2-2": 3.324347972869873, "rewards_train/2-w": 2.132503032684326, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.993202805519104, "rewards_train/margins_1": 1.9218699932098389, "rewards_train/margins_2": 1.1918449401855469, "step": 374 }, { "epoch": 1.12, "logps_train/policy_1_2": -159.95480346679688, "logps_train/policy_1_l": -173.233154296875, "logps_train/policy_1_w": -125.96851348876953, "logps_train/policy_2_2": -127.67826080322266, "logps_train/policy_2_w": -156.5980987548828, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 1.8513941764831543, "rewards_train/1-l": -2.9254159927368164, "rewards_train/1-w": 3.412132740020752, "rewards_train/2-2": 3.323580503463745, "rewards_train/2-w": 2.208940267562866, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.337548732757568, "rewards_train/margins_1": 1.5607385635375977, "rewards_train/margins_2": 1.114640235900879, "step": 374 }, { "epoch": 1.12, "logps_train/policy_1_2": -228.39154052734375, "logps_train/policy_1_l": -260.13897705078125, "logps_train/policy_1_w": -157.64830017089844, "logps_train/policy_2_2": -160.82080078125, "logps_train/policy_2_w": -218.89126586914062, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -232.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 1.9881895780563354, "rewards_train/1-l": -2.8006162643432617, "rewards_train/1-w": 3.7351694107055664, "rewards_train/2-2": 5.031201362609863, "rewards_train/2-w": 1.34212327003479, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.535785675048828, "rewards_train/margins_1": 1.746979832649231, "rewards_train/margins_2": 3.6890780925750732, "step": 374 }, { "epoch": 1.12, "logps_train/policy_1_2": -205.7229461669922, "logps_train/policy_1_l": -229.21839904785156, "logps_train/policy_1_w": -157.75399780273438, "logps_train/policy_2_2": -161.9800262451172, "logps_train/policy_2_w": -206.47506713867188, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.530830979347229, "rewards_train/1-l": -2.6790666580200195, "rewards_train/1-w": 3.994718313217163, "rewards_train/2-2": 3.2965290546417236, "rewards_train/2-w": 2.4384312629699707, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.673784971237183, "rewards_train/margins_1": 2.463887333869934, "rewards_train/margins_2": 0.8580977916717529, "step": 374 }, { "epoch": 1.12, "logps_train/policy_1_2": -138.54214477539062, "logps_train/policy_1_l": -199.0210723876953, "logps_train/policy_1_w": -122.79649353027344, "logps_train/policy_2_2": -103.88801574707031, "logps_train/policy_2_w": -171.98458862304688, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.8555526733398438, "rewards_train/1-l": -2.0809152126312256, "rewards_train/1-w": 3.9902727603912354, "rewards_train/2-2": 2.980339288711548, "rewards_train/2-w": 1.9218542575836182, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.071187973022461, "rewards_train/margins_1": 2.1347200870513916, "rewards_train/margins_2": 1.0584850311279297, "step": 375 }, { "epoch": 1.12, "logps_train/policy_1_2": -145.82992553710938, "logps_train/policy_1_l": -163.28639221191406, "logps_train/policy_1_w": -136.88099670410156, "logps_train/policy_2_2": -105.98513793945312, "logps_train/policy_2_w": -164.6053009033203, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 1.6685702800750732, "rewards_train/1-l": -1.6075453758239746, "rewards_train/1-w": 2.817368984222412, "rewards_train/2-2": 2.888986587524414, "rewards_train/2-w": 1.848844289779663, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.424914360046387, "rewards_train/margins_1": 1.1487987041473389, "rewards_train/margins_2": 1.040142297744751, "step": 375 }, { "epoch": 1.12, "logps_train/policy_1_2": -138.4332275390625, "logps_train/policy_1_l": -182.69906616210938, "logps_train/policy_1_w": -148.6295166015625, "logps_train/policy_2_2": -99.37724304199219, "logps_train/policy_2_w": -189.37039184570312, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.3496696949005127, "rewards_train/1-l": -2.131209373474121, "rewards_train/1-w": 1.602674126625061, "rewards_train/2-2": 2.533857822418213, "rewards_train/2-w": 0.016086071729660034, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.733883500099182, "rewards_train/margins_1": 0.25300443172454834, "rewards_train/margins_2": 2.517771750688553, "step": 375 }, { "epoch": 1.12, "logps_train/policy_1_2": -157.1669921875, "logps_train/policy_1_l": -89.595458984375, "logps_train/policy_1_w": -106.48821258544922, "logps_train/policy_2_2": -130.3950653076172, "logps_train/policy_2_w": -125.8875732421875, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -77.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 1.9489257335662842, "rewards_train/1-l": -1.212670922279358, "rewards_train/1-w": 2.8003973960876465, "rewards_train/2-2": 3.494868755340576, "rewards_train/2-w": 2.08780574798584, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.013068318367004, "rewards_train/margins_1": 0.8514716625213623, "rewards_train/margins_2": 1.4070630073547363, "step": 375 }, { "epoch": 1.12, "logps_train/policy_1_2": -240.07347106933594, "logps_train/policy_1_l": -202.91668701171875, "logps_train/policy_1_w": -120.75192260742188, "logps_train/policy_2_2": -194.62042236328125, "logps_train/policy_2_w": -147.39141845703125, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -236.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 2.016871213912964, "rewards_train/1-l": -1.0955758094787598, "rewards_train/1-w": 3.3459014892578125, "rewards_train/2-2": 4.178581237792969, "rewards_train/2-w": 2.428046226501465, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.441477298736572, "rewards_train/margins_1": 1.3290302753448486, "rewards_train/margins_2": 1.750535011291504, "step": 375 }, { "epoch": 1.12, "logps_train/policy_1_2": -127.03253173828125, "logps_train/policy_1_l": -203.58404541015625, "logps_train/policy_1_w": -83.13175964355469, "logps_train/policy_2_2": -109.40634155273438, "logps_train/policy_2_w": -102.30448913574219, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 1.4865902662277222, "rewards_train/1-l": -3.9505927562713623, "rewards_train/1-w": 3.3376059532165527, "rewards_train/2-2": 2.168741226196289, "rewards_train/2-w": 2.241426467895508, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 7.288198709487915, "rewards_train/margins_1": 1.8510156869888306, "rewards_train/margins_2": -0.07268524169921875, "step": 375 }, { "epoch": 1.12, "logps_train/policy_1_2": -118.33073425292969, "logps_train/policy_1_l": -196.51376342773438, "logps_train/policy_1_w": -81.91973876953125, "logps_train/policy_2_2": -91.80841827392578, "logps_train/policy_2_w": -105.43467712402344, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 1.8341132402420044, "rewards_train/1-l": -3.032968282699585, "rewards_train/1-w": 2.794745445251465, "rewards_train/2-2": 2.906658411026001, "rewards_train/2-w": 2.270594835281372, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.82771372795105, "rewards_train/margins_1": 0.9606322050094604, "rewards_train/margins_2": 0.6360635757446289, "step": 375 }, { "epoch": 1.12, "logps_train/policy_1_2": -206.46267700195312, "logps_train/policy_1_l": -198.48768615722656, "logps_train/policy_1_w": -132.70181274414062, "logps_train/policy_2_2": -161.66184997558594, "logps_train/policy_2_w": -174.44114685058594, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 2.500607967376709, "rewards_train/1-l": -2.0410537719726562, "rewards_train/1-w": 3.5770840644836426, "rewards_train/2-2": 4.2064714431762695, "rewards_train/2-w": 2.6662368774414062, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.618137836456299, "rewards_train/margins_1": 1.0764760971069336, "rewards_train/margins_2": 1.5402345657348633, "step": 375 }, { "epoch": 1.13, "learning_rate": 2.204317072696586e-06, "loss": 0.4966, "step": 376 }, { "epoch": 1.13, "logps_train/policy_1_2": -109.41653442382812, "logps_train/policy_1_l": -227.42735290527344, "logps_train/policy_1_w": -156.20095825195312, "logps_train/policy_2_2": -91.74632263183594, "logps_train/policy_2_w": -191.15492248535156, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -197.0, "logps_train/ref_2_2": -114.5, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": 1.7474088668823242, "rewards_train/1-l": -3.2755489349365234, "rewards_train/1-w": 4.099434852600098, "rewards_train/2-2": 2.281226873397827, "rewards_train/2-w": 2.7946643829345703, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 7.374983787536621, "rewards_train/margins_1": 2.3520259857177734, "rewards_train/margins_2": -0.5134375095367432, "step": 376 }, { "epoch": 1.13, "logps_train/policy_1_2": -62.97079086303711, "logps_train/policy_1_l": -95.93810272216797, "logps_train/policy_1_w": -82.17373657226562, "logps_train/policy_2_2": -50.675010681152344, "logps_train/policy_2_w": -114.24594116210938, "logps_train/ref_1_2": -76.0, "logps_train/ref_1_l": -80.0, "logps_train/ref_1_w": -106.5, "logps_train/ref_2_2": -68.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 1.2904211282730103, "rewards_train/1-l": -1.5992794036865234, "rewards_train/1-w": 2.412313461303711, "rewards_train/2-2": 1.7263466119766235, "rewards_train/2-w": 1.2754056453704834, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.011592864990234, "rewards_train/margins_1": 1.1218923330307007, "rewards_train/margins_2": 0.45094096660614014, "step": 376 }, { "epoch": 1.13, "logps_train/policy_1_2": -149.60794067382812, "logps_train/policy_1_l": -128.28012084960938, "logps_train/policy_1_w": -82.92828369140625, "logps_train/policy_2_2": -115.77084350585938, "logps_train/policy_2_w": -106.10353088378906, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.508736491203308, "rewards_train/1-l": -1.966684341430664, "rewards_train/1-w": 2.819672107696533, "rewards_train/2-2": 2.9729151725769043, "rewards_train/2-w": 1.9537103176116943, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.786356449127197, "rewards_train/margins_1": 1.310935616493225, "rewards_train/margins_2": 1.01920485496521, "step": 376 }, { "epoch": 1.13, "logps_train/policy_1_2": -113.9916763305664, "logps_train/policy_1_l": -135.51901245117188, "logps_train/policy_1_w": -93.28268432617188, "logps_train/policy_2_2": -87.56147766113281, "logps_train/policy_2_w": -115.84449768066406, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -116.5, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.0086452960968018, "rewards_train/1-l": -1.9347134828567505, "rewards_train/1-w": 2.8234899044036865, "rewards_train/2-2": 2.100101947784424, "rewards_train/2-w": 1.8290272951126099, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.758203387260437, "rewards_train/margins_1": 1.8148446083068848, "rewards_train/margins_2": 0.27107465267181396, "step": 376 }, { "epoch": 1.13, "logps_train/policy_1_2": -162.86404418945312, "logps_train/policy_1_l": -162.5260467529297, "logps_train/policy_1_w": -108.26583862304688, "logps_train/policy_2_2": -131.45689392089844, "logps_train/policy_2_w": -142.2064666748047, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.4214074611663818, "rewards_train/1-l": -2.0140304565429688, "rewards_train/1-w": 3.1015408039093018, "rewards_train/2-2": 2.8574352264404297, "rewards_train/2-w": 1.955134391784668, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.1155712604522705, "rewards_train/margins_1": 1.68013334274292, "rewards_train/margins_2": 0.9023008346557617, "step": 376 }, { "epoch": 1.13, "logps_train/policy_1_2": -171.96417236328125, "logps_train/policy_1_l": -195.0443115234375, "logps_train/policy_1_w": -146.7847137451172, "logps_train/policy_2_2": -139.08273315429688, "logps_train/policy_2_w": -179.99600219726562, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 2.846942901611328, "rewards_train/1-l": -1.6224005222320557, "rewards_train/1-w": 3.6871538162231445, "rewards_train/2-2": 4.230397701263428, "rewards_train/2-w": 2.28008770942688, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.3095543384552, "rewards_train/margins_1": 0.8402109146118164, "rewards_train/margins_2": 1.9503099918365479, "step": 376 }, { "epoch": 1.13, "logps_train/policy_1_2": -214.35665893554688, "logps_train/policy_1_l": -231.694580078125, "logps_train/policy_1_w": -107.31997680664062, "logps_train/policy_2_2": -166.46932983398438, "logps_train/policy_2_w": -151.6650390625, "logps_train/ref_1_2": -227.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -201.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.2518351078033447, "rewards_train/1-l": -3.513207197189331, "rewards_train/1-w": 3.2523770332336426, "rewards_train/2-2": 3.4874415397644043, "rewards_train/2-w": 2.049121141433716, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.765584230422974, "rewards_train/margins_1": 2.000541925430298, "rewards_train/margins_2": 1.4383203983306885, "step": 376 }, { "epoch": 1.13, "logps_train/policy_1_2": -52.71586227416992, "logps_train/policy_1_l": -119.93482208251953, "logps_train/policy_1_w": -82.97979736328125, "logps_train/policy_2_2": -42.13090515136719, "logps_train/policy_2_w": -100.3438720703125, "logps_train/ref_1_2": -69.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -59.5, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": 1.6291948556900024, "rewards_train/1-l": -1.6893808841705322, "rewards_train/1-w": 1.994598388671875, "rewards_train/2-2": 1.7613235712051392, "rewards_train/2-w": 1.6259644031524658, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.6839792728424072, "rewards_train/margins_1": 0.36540353298187256, "rewards_train/margins_2": 0.13535916805267334, "step": 376 }, { "epoch": 1.13, "logps_train/policy_1_2": -177.41378784179688, "logps_train/policy_1_l": -166.29403686523438, "logps_train/policy_1_w": -97.08177185058594, "logps_train/policy_2_2": -153.62088012695312, "logps_train/policy_2_w": -121.01605224609375, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 2.658620595932007, "rewards_train/1-l": -1.6403403282165527, "rewards_train/1-w": 3.133228302001953, "rewards_train/2-2": 3.7402544021606445, "rewards_train/2-w": 2.3765196800231934, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.773568630218506, "rewards_train/margins_1": 0.4746077060699463, "rewards_train/margins_2": 1.3637347221374512, "step": 377 }, { "epoch": 1.13, "logps_train/policy_1_2": -136.83663940429688, "logps_train/policy_1_l": -151.51710510253906, "logps_train/policy_1_w": -109.73319244384766, "logps_train/policy_2_2": -94.31080627441406, "logps_train/policy_2_w": -150.26058959960938, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.9343048334121704, "rewards_train/1-l": -1.5377455949783325, "rewards_train/1-w": 3.195040225982666, "rewards_train/2-2": 2.3697004318237305, "rewards_train/2-w": 1.8458163738250732, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.7327858209609985, "rewards_train/margins_1": 2.2607353925704956, "rewards_train/margins_2": 0.5238840579986572, "step": 377 }, { "epoch": 1.13, "logps_train/policy_1_2": -84.47956848144531, "logps_train/policy_1_l": -100.49143981933594, "logps_train/policy_1_w": -58.62245178222656, "logps_train/policy_2_2": -58.471641540527344, "logps_train/policy_2_w": -86.40013885498047, "logps_train/ref_1_2": -91.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -81.5, "logps_train/ref_2_2": -75.0, "logps_train/ref_2_w": -96.5, "rewards_train/1-2": 0.6832931041717529, "rewards_train/1-l": -1.625511646270752, "rewards_train/1-w": 2.2854113578796387, "rewards_train/2-2": 1.6618201732635498, "rewards_train/2-w": 1.0002206563949585, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.9109230041503906, "rewards_train/margins_1": 1.6021182537078857, "rewards_train/margins_2": 0.6615995168685913, "step": 377 }, { "epoch": 1.13, "logps_train/policy_1_2": -203.42013549804688, "logps_train/policy_1_l": -216.86167907714844, "logps_train/policy_1_w": -178.39471435546875, "logps_train/policy_2_2": -144.93582153320312, "logps_train/policy_2_w": -233.9488525390625, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -245.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -276.0, "rewards_train/1-2": 1.7001748085021973, "rewards_train/1-l": -2.1743884086608887, "rewards_train/1-w": 6.721466064453125, "rewards_train/2-2": 4.112668037414551, "rewards_train/2-w": 4.166053771972656, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 8.895854473114014, "rewards_train/margins_1": 5.021291255950928, "rewards_train/margins_2": -0.05338573455810547, "step": 377 }, { "epoch": 1.13, "logps_train/policy_1_2": -149.59747314453125, "logps_train/policy_1_l": -172.68775939941406, "logps_train/policy_1_w": -128.4625701904297, "logps_train/policy_2_2": -104.75712585449219, "logps_train/policy_2_w": -169.322998046875, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 2.6363465785980225, "rewards_train/1-l": -2.0070571899414062, "rewards_train/1-w": 3.138117551803589, "rewards_train/2-2": 4.258662700653076, "rewards_train/2-w": 1.684887409210205, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.145174741744995, "rewards_train/margins_1": 0.5017709732055664, "rewards_train/margins_2": 2.573775291442871, "step": 377 }, { "epoch": 1.13, "logps_train/policy_1_2": -134.0411834716797, "logps_train/policy_1_l": -128.73297119140625, "logps_train/policy_1_w": -93.41915893554688, "logps_train/policy_2_2": -108.80146789550781, "logps_train/policy_2_w": -130.53927612304688, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 2.0833818912506104, "rewards_train/1-l": -2.054546356201172, "rewards_train/1-w": 3.383084297180176, "rewards_train/2-2": 2.7268848419189453, "rewards_train/2-w": 2.1366965770721436, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.437630653381348, "rewards_train/margins_1": 1.2997024059295654, "rewards_train/margins_2": 0.5901882648468018, "step": 377 }, { "epoch": 1.13, "logps_train/policy_1_2": -186.30691528320312, "logps_train/policy_1_l": -241.20941162109375, "logps_train/policy_1_w": -220.4506378173828, "logps_train/policy_2_2": -153.53562927246094, "logps_train/policy_2_w": -271.69549560546875, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -227.0, "logps_train/ref_1_w": -270.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -304.0, "rewards_train/1-2": 2.2497777938842773, "rewards_train/1-l": -1.4334403276443481, "rewards_train/1-w": 5.039310932159424, "rewards_train/2-2": 3.375342845916748, "rewards_train/2-w": 3.055450439453125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 6.472751259803772, "rewards_train/margins_1": 2.7895331382751465, "rewards_train/margins_2": 0.31989240646362305, "step": 377 }, { "epoch": 1.13, "logps_train/policy_1_2": -123.95582580566406, "logps_train/policy_1_l": -140.94305419921875, "logps_train/policy_1_w": -116.60696411132812, "logps_train/policy_2_2": -100.01290893554688, "logps_train/policy_2_w": -160.44911193847656, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 2.351292848587036, "rewards_train/1-l": -1.4005539417266846, "rewards_train/1-w": 2.798678398132324, "rewards_train/2-2": 3.2830843925476074, "rewards_train/2-w": 0.8113393783569336, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.199232339859009, "rewards_train/margins_1": 0.4473855495452881, "rewards_train/margins_2": 2.471745014190674, "step": 377 }, { "epoch": 1.13, "learning_rate": 2.1798071985191833e-06, "loss": 0.5961, "step": 378 }, { "epoch": 1.13, "logps_train/policy_1_2": -134.7891082763672, "logps_train/policy_1_l": -106.87606811523438, "logps_train/policy_1_w": -85.63552856445312, "logps_train/policy_2_2": -97.69245147705078, "logps_train/policy_2_w": -117.39513397216797, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -86.0, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.2367143630981445, "rewards_train/1-l": -2.1016693115234375, "rewards_train/1-w": 3.372385025024414, "rewards_train/2-2": 3.002629518508911, "rewards_train/2-w": 1.7057991027832031, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.474054336547852, "rewards_train/margins_1": 2.1356706619262695, "rewards_train/margins_2": 1.296830415725708, "step": 378 }, { "epoch": 1.13, "logps_train/policy_1_2": -175.62872314453125, "logps_train/policy_1_l": -221.02377319335938, "logps_train/policy_1_w": -157.40269470214844, "logps_train/policy_2_2": -143.05560302734375, "logps_train/policy_2_w": -209.8641357421875, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 2.4488468170166016, "rewards_train/1-l": -2.142221212387085, "rewards_train/1-w": 4.345668315887451, "rewards_train/2-2": 3.531157970428467, "rewards_train/2-w": 2.1385884284973145, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.487889528274536, "rewards_train/margins_1": 1.8968214988708496, "rewards_train/margins_2": 1.3925695419311523, "step": 378 }, { "epoch": 1.13, "logps_train/policy_1_2": -104.90601348876953, "logps_train/policy_1_l": -167.21884155273438, "logps_train/policy_1_w": -57.992679595947266, "logps_train/policy_2_2": -88.13760375976562, "logps_train/policy_2_w": -66.84725189208984, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -79.5, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -85.0, "rewards_train/1-2": 1.524828314781189, "rewards_train/1-l": -2.5233492851257324, "rewards_train/1-w": 2.15053653717041, "rewards_train/2-2": 2.0655360221862793, "rewards_train/2-w": 1.7785556316375732, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.673885822296143, "rewards_train/margins_1": 0.6257082223892212, "rewards_train/margins_2": 0.28698039054870605, "step": 378 }, { "epoch": 1.13, "logps_train/policy_1_2": -98.3160400390625, "logps_train/policy_1_l": -127.58128356933594, "logps_train/policy_1_w": -57.29761505126953, "logps_train/policy_2_2": -80.35617065429688, "logps_train/policy_2_w": -77.45236206054688, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -95.5, "rewards_train/1-2": 1.755994200706482, "rewards_train/1-l": -1.7337145805358887, "rewards_train/1-w": 2.666332721710205, "rewards_train/2-2": 2.1951444149017334, "rewards_train/2-w": 1.8055448532104492, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.400047302246094, "rewards_train/margins_1": 0.9103385210037231, "rewards_train/margins_2": 0.3895995616912842, "step": 378 }, { "epoch": 1.13, "logps_train/policy_1_2": -117.88459777832031, "logps_train/policy_1_l": -101.79678344726562, "logps_train/policy_1_w": -119.94458770751953, "logps_train/policy_2_2": -88.45037078857422, "logps_train/policy_2_w": -161.13330078125, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 0.9435716271400452, "rewards_train/1-l": -1.3445227146148682, "rewards_train/1-w": 3.6742916107177734, "rewards_train/2-2": 2.078498601913452, "rewards_train/2-w": 1.899169921875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 5.018814325332642, "rewards_train/margins_1": 2.7307199835777283, "rewards_train/margins_2": 0.17932868003845215, "step": 378 }, { "epoch": 1.13, "logps_train/policy_1_2": -69.80744171142578, "logps_train/policy_1_l": -102.61358642578125, "logps_train/policy_1_w": -64.10188293457031, "logps_train/policy_2_2": -50.95502471923828, "logps_train/policy_2_w": -85.01670837402344, "logps_train/ref_1_2": -84.0, "logps_train/ref_1_l": -88.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -69.0, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": 1.394256353378296, "rewards_train/1-l": -1.4361629486083984, "rewards_train/1-w": 2.0175459384918213, "rewards_train/2-2": 1.8138723373413086, "rewards_train/2-w": 0.9795793294906616, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4537088871002197, "rewards_train/margins_1": 0.6232895851135254, "rewards_train/margins_2": 0.834293007850647, "step": 378 }, { "epoch": 1.13, "logps_train/policy_1_2": -127.679931640625, "logps_train/policy_1_l": -130.7165985107422, "logps_train/policy_1_w": -109.89851379394531, "logps_train/policy_2_2": -88.10459899902344, "logps_train/policy_2_w": -158.2172393798828, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -114.5, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -117.5, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": 1.6882565021514893, "rewards_train/1-l": -1.6273239850997925, "rewards_train/1-w": 2.8523361682891846, "rewards_train/2-2": 2.931727409362793, "rewards_train/2-w": 1.317728877067566, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.479660153388977, "rewards_train/margins_1": 1.1640796661376953, "rewards_train/margins_2": 1.613998532295227, "step": 378 }, { "epoch": 1.13, "logps_train/policy_1_2": -46.110023498535156, "logps_train/policy_1_l": -113.12374114990234, "logps_train/policy_1_w": -60.51066970825195, "logps_train/policy_2_2": -38.6711540222168, "logps_train/policy_2_w": -67.6705322265625, "logps_train/ref_1_2": -57.5, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -83.0, "logps_train/ref_2_2": -53.5, "logps_train/ref_2_w": -87.0, "rewards_train/1-2": 1.1530601978302002, "rewards_train/1-l": -2.1785855293273926, "rewards_train/1-w": 2.248152017593384, "rewards_train/2-2": 1.4664784669876099, "rewards_train/2-w": 1.926697015762329, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.426737546920776, "rewards_train/margins_1": 1.0950918197631836, "rewards_train/margins_2": -0.46021854877471924, "step": 378 }, { "epoch": 1.13, "logps_train/policy_1_2": -116.22074890136719, "logps_train/policy_1_l": -176.10809326171875, "logps_train/policy_1_w": -113.42115783691406, "logps_train/policy_2_2": -94.74766540527344, "logps_train/policy_2_w": -144.94497680664062, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.6451127529144287, "rewards_train/1-l": -2.196747303009033, "rewards_train/1-w": 3.761009454727173, "rewards_train/2-2": 2.4424209594726562, "rewards_train/2-w": 2.5430026054382324, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.957756757736206, "rewards_train/margins_1": 2.115896701812744, "rewards_train/margins_2": -0.10058164596557617, "step": 379 }, { "epoch": 1.13, "logps_train/policy_1_2": -106.5905990600586, "logps_train/policy_1_l": -186.69183349609375, "logps_train/policy_1_w": -128.660888671875, "logps_train/policy_2_2": -79.39492797851562, "logps_train/policy_2_w": -188.1545867919922, "logps_train/ref_1_2": -122.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.5753148794174194, "rewards_train/1-l": -1.8493722677230835, "rewards_train/1-w": 3.2374260425567627, "rewards_train/2-2": 2.221444606781006, "rewards_train/2-w": 0.9548542499542236, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.086798310279846, "rewards_train/margins_1": 1.6621111631393433, "rewards_train/margins_2": 1.2665903568267822, "step": 379 }, { "epoch": 1.13, "logps_train/policy_1_2": -121.53058624267578, "logps_train/policy_1_l": -189.2222442626953, "logps_train/policy_1_w": -138.19711303710938, "logps_train/policy_2_2": -101.59263610839844, "logps_train/policy_2_w": -174.07159423828125, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.9007009267807007, "rewards_train/1-l": -1.8745689392089844, "rewards_train/1-w": 2.5756020545959473, "rewards_train/2-2": 2.538294553756714, "rewards_train/2-w": 1.1287773847579956, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.450170993804932, "rewards_train/margins_1": 0.6749011278152466, "rewards_train/margins_2": 1.4095171689987183, "step": 379 }, { "epoch": 1.13, "logps_train/policy_1_2": -142.10275268554688, "logps_train/policy_1_l": -160.0838165283203, "logps_train/policy_1_w": -65.40789794921875, "logps_train/policy_2_2": -97.86607360839844, "logps_train/policy_2_w": -101.89493560791016, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 1.1690211296081543, "rewards_train/1-l": -1.5994950532913208, "rewards_train/1-w": 2.323272705078125, "rewards_train/2-2": 3.0595836639404297, "rewards_train/2-w": 1.4667563438415527, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.922767758369446, "rewards_train/margins_1": 1.1542515754699707, "rewards_train/margins_2": 1.592827320098877, "step": 379 }, { "epoch": 1.13, "logps_train/policy_1_2": -88.87245178222656, "logps_train/policy_1_l": -113.81486511230469, "logps_train/policy_1_w": -85.251220703125, "logps_train/policy_2_2": -68.13381958007812, "logps_train/policy_2_w": -113.0013198852539, "logps_train/ref_1_2": -101.0, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -88.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": 1.1869739294052124, "rewards_train/1-l": -1.4967198371887207, "rewards_train/1-w": 3.4306397438049316, "rewards_train/2-2": 1.9545869827270508, "rewards_train/2-w": 2.4236958026885986, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.927359580993652, "rewards_train/margins_1": 2.2436658143997192, "rewards_train/margins_2": -0.46910881996154785, "step": 379 }, { "epoch": 1.13, "logps_train/policy_1_2": -70.43484497070312, "logps_train/policy_1_l": -136.6313934326172, "logps_train/policy_1_w": -80.14633178710938, "logps_train/policy_2_2": -48.545902252197266, "logps_train/policy_2_w": -106.57698822021484, "logps_train/ref_1_2": -82.5, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -66.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 1.1924524307250977, "rewards_train/1-l": -2.2561087608337402, "rewards_train/1-w": 2.2744290828704834, "rewards_train/2-2": 1.7665035724639893, "rewards_train/2-w": 1.4282386302947998, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.530537843704224, "rewards_train/margins_1": 1.0819766521453857, "rewards_train/margins_2": 0.33826494216918945, "step": 379 }, { "epoch": 1.13, "logps_train/policy_1_2": -260.63531494140625, "logps_train/policy_1_l": -185.84027099609375, "logps_train/policy_1_w": -115.8450927734375, "logps_train/policy_2_2": -215.03778076171875, "logps_train/policy_2_w": -165.1334228515625, "logps_train/ref_1_2": -280.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -255.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.9231860637664795, "rewards_train/1-l": -1.5703544616699219, "rewards_train/1-w": 4.09049129486084, "rewards_train/2-2": 4.022003650665283, "rewards_train/2-w": 2.30228328704834, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.660845756530762, "rewards_train/margins_1": 2.1673052310943604, "rewards_train/margins_2": 1.7197203636169434, "step": 379 }, { "epoch": 1.13, "logps_train/policy_1_2": -65.72982788085938, "logps_train/policy_1_l": -73.41722106933594, "logps_train/policy_1_w": -66.71113586425781, "logps_train/policy_2_2": -45.839019775390625, "logps_train/policy_2_w": -91.06449890136719, "logps_train/ref_1_2": -82.0, "logps_train/ref_1_l": -58.5, "logps_train/ref_1_w": -89.0, "logps_train/ref_2_2": -68.0, "logps_train/ref_2_w": -104.0, "rewards_train/1-2": 1.6067047119140625, "rewards_train/1-l": -1.4856677055358887, "rewards_train/1-w": 2.197636127471924, "rewards_train/2-2": 2.2465672492980957, "rewards_train/2-w": 1.2763621807098389, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6833038330078125, "rewards_train/margins_1": 0.5909314155578613, "rewards_train/margins_2": 0.9702050685882568, "step": 379 }, { "epoch": 1.14, "learning_rate": 2.1553285745947396e-06, "loss": 0.4553, "step": 380 }, { "epoch": 1.14, "logps_train/policy_1_2": -112.46293640136719, "logps_train/policy_1_l": -133.0692901611328, "logps_train/policy_1_w": -158.2436065673828, "logps_train/policy_2_2": -79.24864959716797, "logps_train/policy_2_w": -202.47225952148438, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.994330883026123, "rewards_train/1-l": -1.1737258434295654, "rewards_train/1-w": 2.9654834270477295, "rewards_train/2-2": 3.0255253314971924, "rewards_train/2-w": 1.1144938468933105, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.139209270477295, "rewards_train/margins_1": 0.9711525440216064, "rewards_train/margins_2": 1.9110314846038818, "step": 380 }, { "epoch": 1.14, "logps_train/policy_1_2": -123.91932678222656, "logps_train/policy_1_l": -159.19369506835938, "logps_train/policy_1_w": -94.86775207519531, "logps_train/policy_2_2": -106.36389923095703, "logps_train/policy_2_w": -112.036865234375, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.7440046072006226, "rewards_train/1-l": -1.833822250366211, "rewards_train/1-w": 2.215275764465332, "rewards_train/2-2": 2.4528679847717285, "rewards_train/2-w": 1.7111575603485107, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.049098014831543, "rewards_train/margins_1": 0.4712711572647095, "rewards_train/margins_2": 0.7417104244232178, "step": 380 }, { "epoch": 1.14, "logps_train/policy_1_2": -131.83938598632812, "logps_train/policy_1_l": -172.32838439941406, "logps_train/policy_1_w": -100.54676818847656, "logps_train/policy_2_2": -96.57195281982422, "logps_train/policy_2_w": -127.44951629638672, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.088327407836914, "rewards_train/1-l": -3.110182523727417, "rewards_train/1-w": 3.100792407989502, "rewards_train/2-2": 1.9178051948547363, "rewards_train/2-w": 2.0605170726776123, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.210974931716919, "rewards_train/margins_1": 2.012465000152588, "rewards_train/margins_2": -0.14271187782287598, "step": 380 }, { "epoch": 1.14, "logps_train/policy_1_2": -134.3331298828125, "logps_train/policy_1_l": -83.27233123779297, "logps_train/policy_1_w": -140.30181884765625, "logps_train/policy_2_2": -115.87948608398438, "logps_train/policy_2_w": -178.16140747070312, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -76.5, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.897156000137329, "rewards_train/1-l": -0.673229455947876, "rewards_train/1-w": 4.163568019866943, "rewards_train/2-2": 2.6222076416015625, "rewards_train/2-w": 2.5119855403900146, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.836797475814819, "rewards_train/margins_1": 2.2664120197296143, "rewards_train/margins_2": 0.11022210121154785, "step": 380 }, { "epoch": 1.14, "logps_train/policy_1_2": -104.89242553710938, "logps_train/policy_1_l": -62.40532302856445, "logps_train/policy_1_w": -82.33914947509766, "logps_train/policy_2_2": -80.65950012207031, "logps_train/policy_2_w": -124.91857147216797, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -57.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -99.5, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.6469147801399231, "rewards_train/1-l": -0.5430711507797241, "rewards_train/1-w": 2.7578816413879395, "rewards_train/2-2": 1.868571400642395, "rewards_train/2-w": 1.0929090976715088, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3009527921676636, "rewards_train/margins_1": 2.1109668612480164, "rewards_train/margins_2": 0.7756623029708862, "step": 380 }, { "epoch": 1.14, "logps_train/policy_1_2": -92.7323226928711, "logps_train/policy_1_l": -101.90324401855469, "logps_train/policy_1_w": -100.025634765625, "logps_train/policy_2_2": -70.79942321777344, "logps_train/policy_2_w": -131.12191772460938, "logps_train/ref_1_2": -108.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.5283303260803223, "rewards_train/1-l": -0.9258710741996765, "rewards_train/1-w": 2.7376227378845215, "rewards_train/2-2": 2.1989634037017822, "rewards_train/2-w": 1.4895670413970947, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.663493812084198, "rewards_train/margins_1": 1.2092924118041992, "rewards_train/margins_2": 0.7093963623046875, "step": 380 }, { "epoch": 1.14, "logps_train/policy_1_2": -221.73532104492188, "logps_train/policy_1_l": -181.6026153564453, "logps_train/policy_1_w": -101.29443359375, "logps_train/policy_2_2": -167.2208251953125, "logps_train/policy_2_w": -133.3917694091797, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.2944376468658447, "rewards_train/1-l": -1.6055747270584106, "rewards_train/1-w": 2.8627445697784424, "rewards_train/2-2": 3.6388540267944336, "rewards_train/2-w": 1.7092607021331787, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.468319296836853, "rewards_train/margins_1": 1.5683069229125977, "rewards_train/margins_2": 1.9295933246612549, "step": 380 }, { "epoch": 1.14, "logps_train/policy_1_2": -175.288818359375, "logps_train/policy_1_l": -175.7515869140625, "logps_train/policy_1_w": -176.01922607421875, "logps_train/policy_2_2": -129.55474853515625, "logps_train/policy_2_w": -245.37747192382812, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -222.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -264.0, "rewards_train/1-2": 2.6992430686950684, "rewards_train/1-l": -2.4696903228759766, "rewards_train/1-w": 4.604327201843262, "rewards_train/2-2": 4.161712646484375, "rewards_train/2-w": 1.7466286420822144, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.074017524719238, "rewards_train/margins_1": 1.9050841331481934, "rewards_train/margins_2": 2.4150840044021606, "step": 380 }, { "epoch": 1.14, "logps_train/policy_1_2": -140.3607177734375, "logps_train/policy_1_l": -118.63574981689453, "logps_train/policy_1_w": -83.99826049804688, "logps_train/policy_2_2": -100.93647766113281, "logps_train/policy_2_w": -116.0106430053711, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -115.5, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 1.479553461074829, "rewards_train/1-l": -1.5528329610824585, "rewards_train/1-w": 3.1820101737976074, "rewards_train/2-2": 2.6188526153564453, "rewards_train/2-w": 1.9387791156768799, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.734843134880066, "rewards_train/margins_1": 1.7024567127227783, "rewards_train/margins_2": 0.6800734996795654, "step": 381 }, { "epoch": 1.14, "logps_train/policy_1_2": -181.3362274169922, "logps_train/policy_1_l": -207.82708740234375, "logps_train/policy_1_w": -156.51272583007812, "logps_train/policy_2_2": -148.87667846679688, "logps_train/policy_2_w": -214.8714141845703, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -236.0, "rewards_train/1-2": 2.2202839851379395, "rewards_train/1-l": -1.9358339309692383, "rewards_train/1-w": 4.347163677215576, "rewards_train/2-2": 3.2881126403808594, "rewards_train/2-w": 2.161297082901001, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.2829976081848145, "rewards_train/margins_1": 2.1268796920776367, "rewards_train/margins_2": 1.1268155574798584, "step": 381 }, { "epoch": 1.14, "logps_train/policy_1_2": -193.8231201171875, "logps_train/policy_1_l": -105.86146545410156, "logps_train/policy_1_w": -82.44432067871094, "logps_train/policy_2_2": -168.06988525390625, "logps_train/policy_2_w": -110.77203369140625, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.5903441905975342, "rewards_train/1-l": -1.4272112846374512, "rewards_train/1-w": 2.7469735145568848, "rewards_train/2-2": 2.9055113792419434, "rewards_train/2-w": 2.038421392440796, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.174184799194336, "rewards_train/margins_1": 1.1566293239593506, "rewards_train/margins_2": 0.8670899868011475, "step": 381 }, { "epoch": 1.14, "logps_train/policy_1_2": -215.01754760742188, "logps_train/policy_1_l": -226.90118408203125, "logps_train/policy_1_w": -126.9473876953125, "logps_train/policy_2_2": -174.20872497558594, "logps_train/policy_2_w": -171.55169677734375, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 2.0533242225646973, "rewards_train/1-l": -1.8153139352798462, "rewards_train/1-w": 3.6111207008361816, "rewards_train/2-2": 3.530885934829712, "rewards_train/2-w": 2.4323315620422363, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.426434636116028, "rewards_train/margins_1": 1.5577964782714844, "rewards_train/margins_2": 1.0985543727874756, "step": 381 }, { "epoch": 1.14, "logps_train/policy_1_2": -78.13034057617188, "logps_train/policy_1_l": -59.24469757080078, "logps_train/policy_1_w": -41.41001892089844, "logps_train/policy_2_2": -59.658782958984375, "logps_train/policy_2_w": -69.2225341796875, "logps_train/ref_1_2": -85.5, "logps_train/ref_1_l": -48.5, "logps_train/ref_1_w": -64.0, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -83.0, "rewards_train/1-2": 0.7391149997711182, "rewards_train/1-l": -1.0613834857940674, "rewards_train/1-w": 2.275209426879883, "rewards_train/2-2": 1.4247463941574097, "rewards_train/2-w": 1.4066530466079712, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.33659291267395, "rewards_train/margins_1": 1.5360944271087646, "rewards_train/margins_2": 0.018093347549438477, "step": 381 }, { "epoch": 1.14, "logps_train/policy_1_2": -218.36329650878906, "logps_train/policy_1_l": -215.796630859375, "logps_train/policy_1_w": -173.32972717285156, "logps_train/policy_2_2": -170.82672119140625, "logps_train/policy_2_w": -212.08724975585938, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -199.0, "logps_train/ref_1_w": -219.0, "logps_train/ref_2_2": -215.0, "logps_train/ref_2_w": -241.0, "rewards_train/1-2": 2.9042959213256836, "rewards_train/1-l": -1.665601134300232, "rewards_train/1-w": 4.55452823638916, "rewards_train/2-2": 4.486077785491943, "rewards_train/2-w": 2.8787758350372314, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.220129370689392, "rewards_train/margins_1": 1.6502323150634766, "rewards_train/margins_2": 1.607301950454712, "step": 381 }, { "epoch": 1.14, "logps_train/policy_1_2": -85.85635375976562, "logps_train/policy_1_l": -90.88008880615234, "logps_train/policy_1_w": -71.91693115234375, "logps_train/policy_2_2": -68.37541198730469, "logps_train/policy_2_w": -95.43601989746094, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -77.0, "logps_train/ref_1_w": -98.5, "logps_train/ref_2_2": -90.0, "logps_train/ref_2_w": -112.5, "rewards_train/1-2": 1.4159268140792847, "rewards_train/1-l": -1.4051966667175293, "rewards_train/1-w": 2.6264705657958984, "rewards_train/2-2": 2.1390209197998047, "rewards_train/2-w": 1.7087421417236328, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.031667232513428, "rewards_train/margins_1": 1.2105437517166138, "rewards_train/margins_2": 0.4302787780761719, "step": 381 }, { "epoch": 1.14, "logps_train/policy_1_2": -118.53247833251953, "logps_train/policy_1_l": -189.07762145996094, "logps_train/policy_1_w": -136.49497985839844, "logps_train/policy_2_2": -96.31974792480469, "logps_train/policy_2_w": -166.64134216308594, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 2.139037609100342, "rewards_train/1-l": -2.0311994552612305, "rewards_train/1-w": 3.0598771572113037, "rewards_train/2-2": 2.929549217224121, "rewards_train/2-w": 2.185866594314575, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.091076612472534, "rewards_train/margins_1": 0.9208395481109619, "rewards_train/margins_2": 0.7436826229095459, "step": 381 }, { "epoch": 1.14, "learning_rate": 2.1308835899937974e-06, "loss": 0.4758, "step": 382 }, { "epoch": 1.14, "logps_train/policy_1_2": -85.98455810546875, "logps_train/policy_1_l": -47.33186340332031, "logps_train/policy_1_w": -56.006038665771484, "logps_train/policy_2_2": -60.40332794189453, "logps_train/policy_2_w": -79.40064239501953, "logps_train/ref_1_2": -92.0, "logps_train/ref_1_l": -42.0, "logps_train/ref_1_w": -75.0, "logps_train/ref_2_2": -77.0, "logps_train/ref_2_w": -87.5, "rewards_train/1-2": 0.6217647194862366, "rewards_train/1-l": -0.5128675103187561, "rewards_train/1-w": 1.892761468887329, "rewards_train/2-2": 1.6322742700576782, "rewards_train/2-w": 0.8001209497451782, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.405628979206085, "rewards_train/margins_1": 1.2709967494010925, "rewards_train/margins_2": 0.8321533203125, "step": 382 }, { "epoch": 1.14, "logps_train/policy_1_2": -217.7125244140625, "logps_train/policy_1_l": -170.74172973632812, "logps_train/policy_1_w": -144.2141571044922, "logps_train/policy_2_2": -168.32655334472656, "logps_train/policy_2_w": -190.01614379882812, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -217.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 2.8662471771240234, "rewards_train/1-l": -1.6241735219955444, "rewards_train/1-w": 3.634834051132202, "rewards_train/2-2": 4.867344379425049, "rewards_train/2-w": 1.9733844995498657, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.259007573127747, "rewards_train/margins_1": 0.7685868740081787, "rewards_train/margins_2": 2.893959879875183, "step": 382 }, { "epoch": 1.14, "logps_train/policy_1_2": -210.275390625, "logps_train/policy_1_l": -181.83181762695312, "logps_train/policy_1_w": -126.84373474121094, "logps_train/policy_2_2": -172.1517791748047, "logps_train/policy_2_w": -166.92657470703125, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": 3.48496150970459, "rewards_train/1-l": -2.0034942626953125, "rewards_train/1-w": 3.9625015258789062, "rewards_train/2-2": 4.409821510314941, "rewards_train/2-w": 2.788593292236328, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.965995788574219, "rewards_train/margins_1": 0.4775400161743164, "rewards_train/margins_2": 1.6212282180786133, "step": 382 }, { "epoch": 1.14, "logps_train/policy_1_2": -210.68710327148438, "logps_train/policy_1_l": -250.63555908203125, "logps_train/policy_1_w": -160.85287475585938, "logps_train/policy_2_2": -166.73130798339844, "logps_train/policy_2_w": -230.21502685546875, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 2.014882802963257, "rewards_train/1-l": -2.4037904739379883, "rewards_train/1-w": 3.8990883827209473, "rewards_train/2-2": 3.569838047027588, "rewards_train/2-w": 1.615997552871704, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.3028788566589355, "rewards_train/margins_1": 1.8842055797576904, "rewards_train/margins_2": 1.9538404941558838, "step": 382 }, { "epoch": 1.14, "logps_train/policy_1_2": -142.90591430664062, "logps_train/policy_1_l": -143.89813232421875, "logps_train/policy_1_w": -75.93418884277344, "logps_train/policy_2_2": -101.00592803955078, "logps_train/policy_2_w": -101.73690795898438, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 1.0172218084335327, "rewards_train/1-l": -1.8445007801055908, "rewards_train/1-w": 2.1216683387756348, "rewards_train/2-2": 2.8142507076263428, "rewards_train/2-w": 1.414980411529541, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9661691188812256, "rewards_train/margins_1": 1.104446530342102, "rewards_train/margins_2": 1.3992702960968018, "step": 382 }, { "epoch": 1.14, "logps_train/policy_1_2": -138.42457580566406, "logps_train/policy_1_l": -185.589111328125, "logps_train/policy_1_w": -58.19140625, "logps_train/policy_2_2": -120.31269836425781, "logps_train/policy_2_w": -78.51192474365234, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 1.9231668710708618, "rewards_train/1-l": -3.4591073989868164, "rewards_train/1-w": 2.7833011150360107, "rewards_train/2-2": 2.951543092727661, "rewards_train/2-w": 1.9506633281707764, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.242408514022827, "rewards_train/margins_1": 0.8601342439651489, "rewards_train/margins_2": 1.0008797645568848, "step": 382 }, { "epoch": 1.14, "logps_train/policy_1_2": -182.97938537597656, "logps_train/policy_1_l": -210.3390655517578, "logps_train/policy_1_w": -138.56298828125, "logps_train/policy_2_2": -135.19664001464844, "logps_train/policy_2_w": -193.86624145507812, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 2.2239365577697754, "rewards_train/1-l": -1.9049265384674072, "rewards_train/1-w": 3.4331541061401367, "rewards_train/2-2": 3.442836046218872, "rewards_train/2-w": 1.471189260482788, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.338080644607544, "rewards_train/margins_1": 1.2092175483703613, "rewards_train/margins_2": 1.971646785736084, "step": 382 }, { "epoch": 1.14, "logps_train/policy_1_2": -134.19544982910156, "logps_train/policy_1_l": -179.61257934570312, "logps_train/policy_1_w": -130.68067932128906, "logps_train/policy_2_2": -106.11918640136719, "logps_train/policy_2_w": -173.30430603027344, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 2.224205493927002, "rewards_train/1-l": -1.3074491024017334, "rewards_train/1-w": 3.507225275039673, "rewards_train/2-2": 2.963080883026123, "rewards_train/2-w": 2.0578513145446777, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.814674377441406, "rewards_train/margins_1": 1.283019781112671, "rewards_train/margins_2": 0.9052295684814453, "step": 382 }, { "epoch": 1.15, "logps_train/policy_1_2": -190.8948974609375, "logps_train/policy_1_l": -175.6134796142578, "logps_train/policy_1_w": -150.3162841796875, "logps_train/policy_2_2": -144.42880249023438, "logps_train/policy_2_w": -201.15037536621094, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.985508918762207, "rewards_train/1-l": -2.785175085067749, "rewards_train/1-w": 4.074620723724365, "rewards_train/2-2": 3.7258706092834473, "rewards_train/2-w": 1.9568369388580322, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.859795808792114, "rewards_train/margins_1": 2.089111804962158, "rewards_train/margins_2": 1.769033670425415, "step": 383 }, { "epoch": 1.15, "logps_train/policy_1_2": -197.6309814453125, "logps_train/policy_1_l": -156.85548400878906, "logps_train/policy_1_w": -165.38372802734375, "logps_train/policy_2_2": -156.87506103515625, "logps_train/policy_2_w": -214.54632568359375, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 2.2244009971618652, "rewards_train/1-l": -1.731348991394043, "rewards_train/1-w": 4.4952216148376465, "rewards_train/2-2": 3.931243658065796, "rewards_train/2-w": 2.5852112770080566, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.2265706062316895, "rewards_train/margins_1": 2.2708206176757812, "rewards_train/margins_2": 1.3460323810577393, "step": 383 }, { "epoch": 1.15, "logps_train/policy_1_2": -127.06198120117188, "logps_train/policy_1_l": -73.6530532836914, "logps_train/policy_1_w": -43.93424987792969, "logps_train/policy_2_2": -90.31483459472656, "logps_train/policy_2_w": -67.49214935302734, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -62.0, "logps_train/ref_1_w": -63.0, "logps_train/ref_2_2": -114.5, "logps_train/ref_2_w": -73.5, "rewards_train/1-2": 0.5446313619613647, "rewards_train/1-l": -1.1796610355377197, "rewards_train/1-w": 1.8933426141738892, "rewards_train/2-2": 2.3967392444610596, "rewards_train/2-w": 0.6047890186309814, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.073003649711609, "rewards_train/margins_1": 1.3487112522125244, "rewards_train/margins_2": 1.7919502258300781, "step": 383 }, { "epoch": 1.15, "logps_train/policy_1_2": -203.33853149414062, "logps_train/policy_1_l": -273.6319580078125, "logps_train/policy_1_w": -148.5419158935547, "logps_train/policy_2_2": -160.84854125976562, "logps_train/policy_2_w": -198.68975830078125, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -247.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 3.2122411727905273, "rewards_train/1-l": -2.672569513320923, "rewards_train/1-w": 4.770808219909668, "rewards_train/2-2": 4.339951038360596, "rewards_train/2-w": 3.0435242652893066, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.443377733230591, "rewards_train/margins_1": 1.5585670471191406, "rewards_train/margins_2": 1.296426773071289, "step": 383 }, { "epoch": 1.15, "logps_train/policy_1_2": -127.27201843261719, "logps_train/policy_1_l": -129.15829467773438, "logps_train/policy_1_w": -86.56193542480469, "logps_train/policy_2_2": -107.15239715576172, "logps_train/policy_2_w": -106.91341400146484, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -115.5, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -125.5, "rewards_train/1-2": 1.6610794067382812, "rewards_train/1-l": -1.0058684349060059, "rewards_train/1-w": 2.875056266784668, "rewards_train/2-2": 2.3933544158935547, "rewards_train/2-w": 1.8461582660675049, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.880924701690674, "rewards_train/margins_1": 1.2139768600463867, "rewards_train/margins_2": 0.5471961498260498, "step": 383 }, { "epoch": 1.15, "logps_train/policy_1_2": -148.90957641601562, "logps_train/policy_1_l": -185.85858154296875, "logps_train/policy_1_w": -91.9522705078125, "logps_train/policy_2_2": -115.2492904663086, "logps_train/policy_2_w": -124.32219696044922, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 2.5738868713378906, "rewards_train/1-l": -2.122380495071411, "rewards_train/1-w": 3.0118048191070557, "rewards_train/2-2": 3.551633596420288, "rewards_train/2-w": 2.1490302085876465, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.134185314178467, "rewards_train/margins_1": 0.43791794776916504, "rewards_train/margins_2": 1.4026033878326416, "step": 383 }, { "epoch": 1.15, "logps_train/policy_1_2": -184.6431884765625, "logps_train/policy_1_l": -184.3238067626953, "logps_train/policy_1_w": -120.18112182617188, "logps_train/policy_2_2": -152.53277587890625, "logps_train/policy_2_w": -144.38978576660156, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 2.4302124977111816, "rewards_train/1-l": -2.175837516784668, "rewards_train/1-w": 3.465482234954834, "rewards_train/2-2": 3.8146913051605225, "rewards_train/2-w": 2.315708875656128, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.641319751739502, "rewards_train/margins_1": 1.0352697372436523, "rewards_train/margins_2": 1.4989824295043945, "step": 383 }, { "epoch": 1.15, "logps_train/policy_1_2": -132.86880493164062, "logps_train/policy_1_l": -88.04579162597656, "logps_train/policy_1_w": -94.54156494140625, "logps_train/policy_2_2": -108.86675262451172, "logps_train/policy_2_w": -124.41317749023438, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -80.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.539682149887085, "rewards_train/1-l": -0.7686419486999512, "rewards_train/1-w": 3.2497503757476807, "rewards_train/2-2": 2.3293402194976807, "rewards_train/2-w": 1.6797754764556885, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.018392324447632, "rewards_train/margins_1": 1.7100682258605957, "rewards_train/margins_2": 0.6495647430419922, "step": 383 }, { "epoch": 1.15, "learning_rate": 2.1064746305037595e-06, "loss": 0.4134, "step": 384 }, { "epoch": 1.15, "logps_train/policy_1_2": -125.17716979980469, "logps_train/policy_1_l": -151.22933959960938, "logps_train/policy_1_w": -112.30229949951172, "logps_train/policy_2_2": -100.39655303955078, "logps_train/policy_2_w": -155.0029296875, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": 1.5947833061218262, "rewards_train/1-l": -2.202620506286621, "rewards_train/1-w": 2.9814882278442383, "rewards_train/2-2": 2.6525325775146484, "rewards_train/2-w": 1.2075185775756836, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.184108734130859, "rewards_train/margins_1": 1.386704921722412, "rewards_train/margins_2": 1.4450139999389648, "step": 384 }, { "epoch": 1.15, "logps_train/policy_1_2": -114.64470672607422, "logps_train/policy_1_l": -142.1419677734375, "logps_train/policy_1_w": -68.24020385742188, "logps_train/policy_2_2": -96.675048828125, "logps_train/policy_2_w": -83.1411361694336, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -88.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -99.5, "rewards_train/1-2": 1.9652166366577148, "rewards_train/1-l": -1.652477502822876, "rewards_train/1-w": 1.9998074769973755, "rewards_train/2-2": 2.537182331085205, "rewards_train/2-w": 1.6370582580566406, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.6522849798202515, "rewards_train/margins_1": 0.034590840339660645, "rewards_train/margins_2": 0.9001240730285645, "step": 384 }, { "epoch": 1.15, "logps_train/policy_1_2": -214.7076873779297, "logps_train/policy_1_l": -165.50357055664062, "logps_train/policy_1_w": -162.5130157470703, "logps_train/policy_2_2": -180.18777465820312, "logps_train/policy_2_w": -188.20462036132812, "logps_train/ref_1_2": -241.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 2.591731071472168, "rewards_train/1-l": -1.541764497756958, "rewards_train/1-w": 4.143230438232422, "rewards_train/2-2": 4.223409175872803, "rewards_train/2-w": 2.8701634407043457, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.68499493598938, "rewards_train/margins_1": 1.551499366760254, "rewards_train/margins_2": 1.353245735168457, "step": 384 }, { "epoch": 1.15, "logps_train/policy_1_2": -124.19163513183594, "logps_train/policy_1_l": -229.32615661621094, "logps_train/policy_1_w": -138.94277954101562, "logps_train/policy_2_2": -90.23226928710938, "logps_train/policy_2_w": -192.89944458007812, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.8767352104187012, "rewards_train/1-l": -1.712106466293335, "rewards_train/1-w": 2.72603440284729, "rewards_train/2-2": 2.9150547981262207, "rewards_train/2-w": 1.306150197982788, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.438140869140625, "rewards_train/margins_1": 0.8492991924285889, "rewards_train/margins_2": 1.6089046001434326, "step": 384 }, { "epoch": 1.15, "logps_train/policy_1_2": -267.58514404296875, "logps_train/policy_1_l": -291.4783630371094, "logps_train/policy_1_w": -191.98513793945312, "logps_train/policy_2_2": -223.01385498046875, "logps_train/policy_2_w": -241.7464599609375, "logps_train/ref_1_2": -296.0, "logps_train/ref_1_l": -270.0, "logps_train/ref_1_w": -236.0, "logps_train/ref_2_2": -264.0, "logps_train/ref_2_w": -272.0, "rewards_train/1-2": 2.72898530960083, "rewards_train/1-l": -2.235335350036621, "rewards_train/1-w": 4.451486587524414, "rewards_train/2-2": 4.22361421585083, "rewards_train/2-w": 3.0128538608551025, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.686821937561035, "rewards_train/margins_1": 1.722501277923584, "rewards_train/margins_2": 1.2107603549957275, "step": 384 }, { "epoch": 1.15, "logps_train/policy_1_2": -231.1887664794922, "logps_train/policy_1_l": -170.19512939453125, "logps_train/policy_1_w": -142.6433868408203, "logps_train/policy_2_2": -178.32687377929688, "logps_train/policy_2_w": -179.56405639648438, "logps_train/ref_1_2": -239.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 0.8092484474182129, "rewards_train/1-l": -1.793145775794983, "rewards_train/1-w": 3.5294113159179688, "rewards_train/2-2": 3.09543776512146, "rewards_train/2-w": 2.3857829570770264, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.322557091712952, "rewards_train/margins_1": 2.720162868499756, "rewards_train/margins_2": 0.7096548080444336, "step": 384 }, { "epoch": 1.15, "logps_train/policy_1_2": -212.59841918945312, "logps_train/policy_1_l": -240.10226440429688, "logps_train/policy_1_w": -173.98599243164062, "logps_train/policy_2_2": -176.03590393066406, "logps_train/policy_2_w": -227.174072265625, "logps_train/ref_1_2": -231.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -222.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -258.0, "rewards_train/1-2": 1.8464083671569824, "rewards_train/1-l": -3.2094435691833496, "rewards_train/1-w": 4.792024612426758, "rewards_train/2-2": 3.608910322189331, "rewards_train/2-w": 3.0888423919677734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 8.001468181610107, "rewards_train/margins_1": 2.9456162452697754, "rewards_train/margins_2": 0.5200679302215576, "step": 384 }, { "epoch": 1.15, "logps_train/policy_1_2": -149.76222229003906, "logps_train/policy_1_l": -125.61665344238281, "logps_train/policy_1_w": -155.33917236328125, "logps_train/policy_2_2": -111.69644165039062, "logps_train/policy_2_w": -194.96112060546875, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.9354972839355469, "rewards_train/1-l": -1.1507281064987183, "rewards_train/1-w": 4.359833717346191, "rewards_train/2-2": 3.4623875617980957, "rewards_train/2-w": 2.69138765335083, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.51056182384491, "rewards_train/margins_1": 2.4243364334106445, "rewards_train/margins_2": 0.7709999084472656, "step": 384 }, { "epoch": 1.15, "logps_train/policy_1_2": -92.46522521972656, "logps_train/policy_1_l": -208.55117797851562, "logps_train/policy_1_w": -137.3422088623047, "logps_train/policy_2_2": -80.2246322631836, "logps_train/policy_2_w": -161.44232177734375, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 2.6066031455993652, "rewards_train/1-l": -2.583242654800415, "rewards_train/1-w": 3.017732620239258, "rewards_train/2-2": 2.8189430236816406, "rewards_train/2-w": 1.828423261642456, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.600975275039673, "rewards_train/margins_1": 0.4111294746398926, "rewards_train/margins_2": 0.9905197620391846, "step": 385 }, { "epoch": 1.15, "logps_train/policy_1_2": -140.7869873046875, "logps_train/policy_1_l": -159.48529052734375, "logps_train/policy_1_w": -58.08233642578125, "logps_train/policy_2_2": -97.71139526367188, "logps_train/policy_2_w": -83.14434814453125, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -77.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -93.5, "rewards_train/1-2": 0.3994256854057312, "rewards_train/1-l": -2.419623374938965, "rewards_train/1-w": 1.876922845840454, "rewards_train/2-2": 2.5468294620513916, "rewards_train/2-w": 1.032245397567749, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.296546220779419, "rewards_train/margins_1": 1.477497160434723, "rewards_train/margins_2": 1.5145840644836426, "step": 385 }, { "epoch": 1.15, "logps_train/policy_1_2": -182.491943359375, "logps_train/policy_1_l": -209.57659912109375, "logps_train/policy_1_w": -163.50714111328125, "logps_train/policy_2_2": -151.01954650878906, "logps_train/policy_2_w": -202.37014770507812, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 3.3476805686950684, "rewards_train/1-l": -2.0623488426208496, "rewards_train/1-w": 2.7945995330810547, "rewards_train/2-2": 4.409764289855957, "rewards_train/2-w": 1.4098608493804932, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.856948375701904, "rewards_train/margins_1": -0.5530810356140137, "rewards_train/margins_2": 2.999903440475464, "step": 385 }, { "epoch": 1.15, "logps_train/policy_1_2": -141.0159912109375, "logps_train/policy_1_l": -142.40179443359375, "logps_train/policy_1_w": -103.73245239257812, "logps_train/policy_2_2": -106.8348388671875, "logps_train/policy_2_w": -127.12643432617188, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.2382451295852661, "rewards_train/1-l": -2.381585121154785, "rewards_train/1-w": 3.44081711769104, "rewards_train/2-2": 2.4751105308532715, "rewards_train/2-w": 2.2748565673828125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.822402238845825, "rewards_train/margins_1": 2.202571988105774, "rewards_train/margins_2": 0.20025396347045898, "step": 385 }, { "epoch": 1.15, "logps_train/policy_1_2": -74.3333740234375, "logps_train/policy_1_l": -109.80789184570312, "logps_train/policy_1_w": -74.82647705078125, "logps_train/policy_2_2": -56.974586486816406, "logps_train/policy_2_w": -105.13729858398438, "logps_train/ref_1_2": -94.0, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -82.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.962365984916687, "rewards_train/1-l": -2.2686805725097656, "rewards_train/1-w": 3.4079771041870117, "rewards_train/2-2": 2.5099635124206543, "rewards_train/2-w": 2.392519950866699, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.676657676696777, "rewards_train/margins_1": 1.4456111192703247, "rewards_train/margins_2": 0.11744356155395508, "step": 385 }, { "epoch": 1.15, "logps_train/policy_1_2": -142.11260986328125, "logps_train/policy_1_l": -134.92706298828125, "logps_train/policy_1_w": -101.89196014404297, "logps_train/policy_2_2": -113.95553588867188, "logps_train/policy_2_w": -128.27120971679688, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -119.5, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.056707739830017, "rewards_train/1-l": -1.5255184173583984, "rewards_train/1-w": 3.293616533279419, "rewards_train/2-2": 2.1970250606536865, "rewards_train/2-w": 2.343191146850586, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.819134950637817, "rewards_train/margins_1": 2.236908793449402, "rewards_train/margins_2": -0.14616608619689941, "step": 385 }, { "epoch": 1.15, "logps_train/policy_1_2": -184.43087768554688, "logps_train/policy_1_l": -217.5712127685547, "logps_train/policy_1_w": -134.90432739257812, "logps_train/policy_2_2": -152.931884765625, "logps_train/policy_2_w": -185.42086791992188, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 2.056131601333618, "rewards_train/1-l": -2.3791913986206055, "rewards_train/1-w": 3.8970677852630615, "rewards_train/2-2": 2.9982175827026367, "rewards_train/2-w": 2.5016632080078125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.276259183883667, "rewards_train/margins_1": 1.8409361839294434, "rewards_train/margins_2": 0.4965543746948242, "step": 385 }, { "epoch": 1.15, "logps_train/policy_1_2": -142.39718627929688, "logps_train/policy_1_l": -161.94729614257812, "logps_train/policy_1_w": -103.79301452636719, "logps_train/policy_2_2": -116.5724868774414, "logps_train/policy_2_w": -133.5511016845703, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 2.260281801223755, "rewards_train/1-l": -1.9759793281555176, "rewards_train/1-w": 3.2269492149353027, "rewards_train/2-2": 3.3052520751953125, "rewards_train/2-w": 2.1527023315429688, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.20292854309082, "rewards_train/margins_1": 0.9666674137115479, "rewards_train/margins_2": 1.1525497436523438, "step": 385 }, { "epoch": 1.16, "learning_rate": 2.0821040783960422e-06, "loss": 0.4132, "step": 386 }, { "epoch": 1.16, "logps_train/policy_1_2": -80.36068725585938, "logps_train/policy_1_l": -160.45138549804688, "logps_train/policy_1_w": -75.12466430664062, "logps_train/policy_2_2": -59.37759780883789, "logps_train/policy_2_w": -99.45877838134766, "logps_train/ref_1_2": -94.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -80.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 1.3576816320419312, "rewards_train/1-l": -2.624631404876709, "rewards_train/1-w": 3.6137051582336426, "rewards_train/2-2": 2.064584255218506, "rewards_train/2-w": 2.6943564414978027, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 6.238336563110352, "rewards_train/margins_1": 2.2560235261917114, "rewards_train/margins_2": -0.6297721862792969, "step": 386 }, { "epoch": 1.16, "logps_train/policy_1_2": -189.67347717285156, "logps_train/policy_1_l": -220.0118408203125, "logps_train/policy_1_w": -128.93255615234375, "logps_train/policy_2_2": -144.1791229248047, "logps_train/policy_2_w": -171.08709716796875, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -199.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 2.072495460510254, "rewards_train/1-l": -2.0808706283569336, "rewards_train/1-w": 3.6145570278167725, "rewards_train/2-2": 3.350057363510132, "rewards_train/2-w": 2.6866021156311035, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.695427656173706, "rewards_train/margins_1": 1.5420615673065186, "rewards_train/margins_2": 0.6634552478790283, "step": 386 }, { "epoch": 1.16, "logps_train/policy_1_2": -121.11288452148438, "logps_train/policy_1_l": -163.9480743408203, "logps_train/policy_1_w": -158.30911254882812, "logps_train/policy_2_2": -98.44088745117188, "logps_train/policy_2_w": -190.79672241210938, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.5321680307388306, "rewards_train/1-l": -1.5597491264343262, "rewards_train/1-w": 3.5317840576171875, "rewards_train/2-2": 2.4101107120513916, "rewards_train/2-w": 2.2257962226867676, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.091533184051514, "rewards_train/margins_1": 1.999616026878357, "rewards_train/margins_2": 0.18431448936462402, "step": 386 }, { "epoch": 1.16, "logps_train/policy_1_2": -98.13319396972656, "logps_train/policy_1_l": -110.71516418457031, "logps_train/policy_1_w": -78.22628784179688, "logps_train/policy_2_2": -83.0511703491211, "logps_train/policy_2_w": -97.46202087402344, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -105.5, "logps_train/ref_2_w": -111.5, "rewards_train/1-2": 1.7370715141296387, "rewards_train/1-l": -1.4480793476104736, "rewards_train/1-w": 2.231130838394165, "rewards_train/2-2": 2.2620699405670166, "rewards_train/2-w": 1.4010145664215088, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.6792101860046387, "rewards_train/margins_1": 0.49405932426452637, "rewards_train/margins_2": 0.8610553741455078, "step": 386 }, { "epoch": 1.16, "logps_train/policy_1_2": -168.84471130371094, "logps_train/policy_1_l": -197.3514862060547, "logps_train/policy_1_w": -110.38272094726562, "logps_train/policy_2_2": -142.63705444335938, "logps_train/policy_2_w": -135.859130859375, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 2.265528917312622, "rewards_train/1-l": -2.587491989135742, "rewards_train/1-w": 3.5836031436920166, "rewards_train/2-2": 3.33160662651062, "rewards_train/2-w": 2.8515868186950684, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.171095132827759, "rewards_train/margins_1": 1.3180742263793945, "rewards_train/margins_2": 0.48001980781555176, "step": 386 }, { "epoch": 1.16, "logps_train/policy_1_2": -141.58349609375, "logps_train/policy_1_l": -210.53475952148438, "logps_train/policy_1_w": -93.79597473144531, "logps_train/policy_2_2": -104.87101745605469, "logps_train/policy_2_w": -122.3118667602539, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": 1.0385258197784424, "rewards_train/1-l": -2.114414691925049, "rewards_train/1-w": 2.459465503692627, "rewards_train/2-2": 2.328523874282837, "rewards_train/2-w": 1.4516260623931885, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.573880195617676, "rewards_train/margins_1": 1.4209396839141846, "rewards_train/margins_2": 0.8768978118896484, "step": 386 }, { "epoch": 1.16, "logps_train/policy_1_2": -197.1630096435547, "logps_train/policy_1_l": -208.6416778564453, "logps_train/policy_1_w": -134.0847930908203, "logps_train/policy_2_2": -161.39256286621094, "logps_train/policy_2_w": -174.56594848632812, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.9633867740631104, "rewards_train/1-l": -2.5719809532165527, "rewards_train/1-w": 3.5946459770202637, "rewards_train/2-2": 3.5357437133789062, "rewards_train/2-w": 1.8871561288833618, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.166626930236816, "rewards_train/margins_1": 1.6312592029571533, "rewards_train/margins_2": 1.6485875844955444, "step": 386 }, { "epoch": 1.16, "logps_train/policy_1_2": -69.63140869140625, "logps_train/policy_1_l": -68.86214447021484, "logps_train/policy_1_w": -58.35297393798828, "logps_train/policy_2_2": -49.66526794433594, "logps_train/policy_2_w": -83.05735778808594, "logps_train/ref_1_2": -85.0, "logps_train/ref_1_l": -50.5, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -72.0, "logps_train/ref_2_w": -96.5, "rewards_train/1-2": 1.527484655380249, "rewards_train/1-l": -1.827425241470337, "rewards_train/1-w": 2.1456594467163086, "rewards_train/2-2": 2.199098587036133, "rewards_train/2-w": 1.3507099151611328, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9730846881866455, "rewards_train/margins_1": 0.6181747913360596, "rewards_train/margins_2": 0.848388671875, "step": 386 }, { "epoch": 1.16, "logps_train/policy_1_2": -207.5977783203125, "logps_train/policy_1_l": -252.9172821044922, "logps_train/policy_1_w": -114.01824951171875, "logps_train/policy_2_2": -160.617431640625, "logps_train/policy_2_w": -146.73733520507812, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -229.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.5370965003967285, "rewards_train/1-l": -2.338993549346924, "rewards_train/1-w": 2.5770809650421143, "rewards_train/2-2": 3.3648200035095215, "rewards_train/2-w": 1.471187949180603, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.916074514389038, "rewards_train/margins_1": 1.0399844646453857, "rewards_train/margins_2": 1.8936320543289185, "step": 387 }, { "epoch": 1.16, "logps_train/policy_1_2": -183.94422912597656, "logps_train/policy_1_l": -271.8995666503906, "logps_train/policy_1_w": -104.31278991699219, "logps_train/policy_2_2": -137.8220672607422, "logps_train/policy_2_w": -137.98687744140625, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -232.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.755577564239502, "rewards_train/1-l": -4.010659217834473, "rewards_train/1-w": 2.5937209129333496, "rewards_train/2-2": 3.374067544937134, "rewards_train/2-w": 1.618499517440796, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.604380130767822, "rewards_train/margins_1": 0.8381433486938477, "rewards_train/margins_2": 1.755568027496338, "step": 387 }, { "epoch": 1.16, "logps_train/policy_1_2": -181.43316650390625, "logps_train/policy_1_l": -214.5253143310547, "logps_train/policy_1_w": -191.78497314453125, "logps_train/policy_2_2": -152.60836791992188, "logps_train/policy_2_w": -222.39984130859375, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -229.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 1.8648866415023804, "rewards_train/1-l": -2.8283133506774902, "rewards_train/1-w": 3.75040864944458, "rewards_train/2-2": 2.861233949661255, "rewards_train/2-w": 2.560798168182373, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.57872200012207, "rewards_train/margins_1": 1.8855220079421997, "rewards_train/margins_2": 0.30043578147888184, "step": 387 }, { "epoch": 1.16, "logps_train/policy_1_2": -56.14788055419922, "logps_train/policy_1_l": -81.12644958496094, "logps_train/policy_1_w": -87.06784057617188, "logps_train/policy_2_2": -41.46346664428711, "logps_train/policy_2_w": -122.79107666015625, "logps_train/ref_1_2": -64.0, "logps_train/ref_1_l": -71.0, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -56.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 0.8035712838172913, "rewards_train/1-l": -1.0282700061798096, "rewards_train/1-w": 1.8621609210968018, "rewards_train/2-2": 1.4785557985305786, "rewards_train/2-w": 0.42401835322380066, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8904309272766113, "rewards_train/margins_1": 1.0585896372795105, "rewards_train/margins_2": 1.054537445306778, "step": 387 }, { "epoch": 1.16, "logps_train/policy_1_2": -169.15081787109375, "logps_train/policy_1_l": -131.39773559570312, "logps_train/policy_1_w": -110.47374725341797, "logps_train/policy_2_2": -127.50619506835938, "logps_train/policy_2_w": -154.24041748046875, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 0.7177308201789856, "rewards_train/1-l": -1.646024465560913, "rewards_train/1-w": 3.1073124408721924, "rewards_train/2-2": 2.499380588531494, "rewards_train/2-w": 1.132207989692688, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.7533369064331055, "rewards_train/margins_1": 2.389581620693207, "rewards_train/margins_2": 1.3671725988388062, "step": 387 }, { "epoch": 1.16, "logps_train/policy_1_2": -146.7145233154297, "logps_train/policy_1_l": -143.43365478515625, "logps_train/policy_1_w": -60.000572204589844, "logps_train/policy_2_2": -109.19896697998047, "logps_train/policy_2_w": -92.9690170288086, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -102.0, "rewards_train/1-2": 0.8168293237686157, "rewards_train/1-l": -1.992193341255188, "rewards_train/1-w": 2.037443161010742, "rewards_train/2-2": 2.580103635787964, "rewards_train/2-w": 0.9155985116958618, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.02963650226593, "rewards_train/margins_1": 1.2206138372421265, "rewards_train/margins_2": 1.664505124092102, "step": 387 }, { "epoch": 1.16, "logps_train/policy_1_2": -125.60092163085938, "logps_train/policy_1_l": -155.053955078125, "logps_train/policy_1_w": -121.34271240234375, "logps_train/policy_2_2": -86.30240631103516, "logps_train/policy_2_w": -155.45700073242188, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.6860010623931885, "rewards_train/1-l": -1.8647704124450684, "rewards_train/1-w": 2.9094786643981934, "rewards_train/2-2": 2.641634941101074, "rewards_train/2-w": 1.4699256420135498, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.774249076843262, "rewards_train/margins_1": 1.2234776020050049, "rewards_train/margins_2": 1.1717092990875244, "step": 387 }, { "epoch": 1.16, "logps_train/policy_1_2": -83.05288696289062, "logps_train/policy_1_l": -119.53520965576172, "logps_train/policy_1_w": -106.4786605834961, "logps_train/policy_2_2": -62.03125762939453, "logps_train/policy_2_w": -150.34829711914062, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -84.5, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.6868987083435059, "rewards_train/1-l": -1.8468801975250244, "rewards_train/1-w": 3.0630714893341064, "rewards_train/2-2": 2.2562496662139893, "rewards_train/2-w": 1.3378269672393799, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.909951686859131, "rewards_train/margins_1": 1.3761727809906006, "rewards_train/margins_2": 0.9184226989746094, "step": 387 }, { "epoch": 1.16, "learning_rate": 2.057774312193568e-06, "loss": 0.4734, "step": 388 }, { "epoch": 1.16, "logps_train/policy_1_2": -120.84777069091797, "logps_train/policy_1_l": -164.81411743164062, "logps_train/policy_1_w": -121.14625549316406, "logps_train/policy_2_2": -91.20024108886719, "logps_train/policy_2_w": -165.01904296875, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.596473217010498, "rewards_train/1-l": -1.9702799320220947, "rewards_train/1-w": 2.3205313682556152, "rewards_train/2-2": 2.687788486480713, "rewards_train/2-w": 1.1016122102737427, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.29081130027771, "rewards_train/margins_1": 0.7240581512451172, "rewards_train/margins_2": 1.5861762762069702, "step": 388 }, { "epoch": 1.16, "logps_train/policy_1_2": -149.05096435546875, "logps_train/policy_1_l": -128.532470703125, "logps_train/policy_1_w": -123.61923217773438, "logps_train/policy_2_2": -106.61669921875, "logps_train/policy_2_w": -158.3197021484375, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 0.449590802192688, "rewards_train/1-l": -1.6114490032196045, "rewards_train/1-w": 2.768545627593994, "rewards_train/2-2": 2.2430171966552734, "rewards_train/2-w": 1.3680298328399658, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.379994630813599, "rewards_train/margins_1": 2.318954825401306, "rewards_train/margins_2": 0.8749873638153076, "step": 388 }, { "epoch": 1.16, "logps_train/policy_1_2": -135.99176025390625, "logps_train/policy_1_l": -85.31575012207031, "logps_train/policy_1_w": -81.142578125, "logps_train/policy_2_2": -99.89579772949219, "logps_train/policy_2_w": -118.66596221923828, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -73.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 0.7691848278045654, "rewards_train/1-l": -1.2206377983093262, "rewards_train/1-w": 3.062304973602295, "rewards_train/2-2": 2.7197952270507812, "rewards_train/2-w": 1.3927791118621826, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.282942771911621, "rewards_train/margins_1": 2.2931201457977295, "rewards_train/margins_2": 1.3270161151885986, "step": 388 }, { "epoch": 1.16, "logps_train/policy_1_2": -160.48361206054688, "logps_train/policy_1_l": -201.87083435058594, "logps_train/policy_1_w": -143.33132934570312, "logps_train/policy_2_2": -115.53264617919922, "logps_train/policy_2_w": -195.68057250976562, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 2.0094499588012695, "rewards_train/1-l": -2.0825915336608887, "rewards_train/1-w": 3.7207725048065186, "rewards_train/2-2": 3.826423168182373, "rewards_train/2-w": 2.0381920337677, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.803364038467407, "rewards_train/margins_1": 1.711322546005249, "rewards_train/margins_2": 1.7882311344146729, "step": 388 }, { "epoch": 1.16, "logps_train/policy_1_2": -121.6360092163086, "logps_train/policy_1_l": -135.05313110351562, "logps_train/policy_1_w": -106.4765625, "logps_train/policy_2_2": -94.61128997802734, "logps_train/policy_2_w": -137.4148406982422, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -110.5, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.4448961019515991, "rewards_train/1-l": -2.482633113861084, "rewards_train/1-w": 2.814453125, "rewards_train/2-2": 2.355131149291992, "rewards_train/2-w": 1.2999224662780762, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.297086238861084, "rewards_train/margins_1": 1.3695570230484009, "rewards_train/margins_2": 1.055208683013916, "step": 388 }, { "epoch": 1.16, "logps_train/policy_1_2": -253.83294677734375, "logps_train/policy_1_l": -197.86651611328125, "logps_train/policy_1_w": -123.78556823730469, "logps_train/policy_2_2": -194.52511596679688, "logps_train/policy_2_w": -171.13583374023438, "logps_train/ref_1_2": -272.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -235.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.7573308944702148, "rewards_train/1-l": -2.7585268020629883, "rewards_train/1-w": 3.449568748474121, "rewards_train/2-2": 4.053739547729492, "rewards_train/2-w": 1.8739166259765625, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.208095550537109, "rewards_train/margins_1": 1.6922378540039062, "rewards_train/margins_2": 2.1798229217529297, "step": 388 }, { "epoch": 1.16, "logps_train/policy_1_2": -128.97988891601562, "logps_train/policy_1_l": -128.02041625976562, "logps_train/policy_1_w": -51.91041946411133, "logps_train/policy_2_2": -105.63471221923828, "logps_train/policy_2_w": -66.994873046875, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -66.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -76.5, "rewards_train/1-2": 1.3020117282867432, "rewards_train/1-l": -1.9887595176696777, "rewards_train/1-w": 1.399583101272583, "rewards_train/2-2": 2.083404064178467, "rewards_train/2-w": 0.9530519247055054, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.3883426189422607, "rewards_train/margins_1": 0.09757137298583984, "rewards_train/margins_2": 1.1303521394729614, "step": 388 }, { "epoch": 1.16, "logps_train/policy_1_2": -76.45283508300781, "logps_train/policy_1_l": -109.61703491210938, "logps_train/policy_1_w": -105.92265319824219, "logps_train/policy_2_2": -67.25718688964844, "logps_train/policy_2_w": -131.49371337890625, "logps_train/ref_1_2": -90.5, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -83.5, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.3816696405410767, "rewards_train/1-l": -1.1538915634155273, "rewards_train/1-w": 2.5835156440734863, "rewards_train/2-2": 1.6465466022491455, "rewards_train/2-w": 1.4982845783233643, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.7374072074890137, "rewards_train/margins_1": 1.2018460035324097, "rewards_train/margins_2": 0.14826202392578125, "step": 388 }, { "epoch": 1.16, "logps_train/policy_1_2": -166.8225555419922, "logps_train/policy_1_l": -223.09896850585938, "logps_train/policy_1_w": -185.37911987304688, "logps_train/policy_2_2": -136.1532440185547, "logps_train/policy_2_w": -223.97592163085938, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 1.7833694219589233, "rewards_train/1-l": -2.672445297241211, "rewards_train/1-w": 3.4074015617370605, "rewards_train/2-2": 2.817488670349121, "rewards_train/2-w": 1.6313142776489258, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.0798468589782715, "rewards_train/margins_1": 1.6240321397781372, "rewards_train/margins_2": 1.1861743927001953, "step": 389 }, { "epoch": 1.16, "logps_train/policy_1_2": -202.95118713378906, "logps_train/policy_1_l": -237.80978393554688, "logps_train/policy_1_w": -155.12139892578125, "logps_train/policy_2_2": -152.4467315673828, "logps_train/policy_2_w": -210.85574340820312, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -197.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.4469709396362305, "rewards_train/1-l": -3.325509786605835, "rewards_train/1-w": 4.212859153747559, "rewards_train/2-2": 3.3211469650268555, "rewards_train/2-w": 1.9394252300262451, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.5383689403533936, "rewards_train/margins_1": 2.765888214111328, "rewards_train/margins_2": 1.3817217350006104, "step": 389 }, { "epoch": 1.16, "logps_train/policy_1_2": -147.84976196289062, "logps_train/policy_1_l": -125.98049926757812, "logps_train/policy_1_w": -178.1619873046875, "logps_train/policy_2_2": -114.01212310791016, "logps_train/policy_2_w": -244.47357177734375, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.8064299821853638, "rewards_train/1-l": -1.1035182476043701, "rewards_train/1-w": 3.984095335006714, "rewards_train/2-2": 2.856599807739258, "rewards_train/2-w": 1.0432679653167725, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.087613582611084, "rewards_train/margins_1": 2.17766535282135, "rewards_train/margins_2": 1.8133318424224854, "step": 389 }, { "epoch": 1.16, "logps_train/policy_1_2": -79.49940490722656, "logps_train/policy_1_l": -49.970882415771484, "logps_train/policy_1_w": -38.70825958251953, "logps_train/policy_2_2": -61.587589263916016, "logps_train/policy_2_w": -59.800086975097656, "logps_train/ref_1_2": -88.5, "logps_train/ref_1_l": -40.75, "logps_train/ref_1_w": -62.5, "logps_train/ref_2_2": -77.5, "logps_train/ref_2_w": -73.0, "rewards_train/1-2": 0.9132921695709229, "rewards_train/1-l": -0.9275084733963013, "rewards_train/1-w": 2.36794376373291, "rewards_train/2-2": 1.5887507200241089, "rewards_train/2-w": 1.3198938369750977, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.2954522371292114, "rewards_train/margins_1": 1.4546515941619873, "rewards_train/margins_2": 0.26885688304901123, "step": 389 }, { "epoch": 1.16, "logps_train/policy_1_2": -117.03022766113281, "logps_train/policy_1_l": -102.94554901123047, "logps_train/policy_1_w": -70.79203796386719, "logps_train/policy_2_2": -79.0262222290039, "logps_train/policy_2_w": -101.55591583251953, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -92.0, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 1.001664638519287, "rewards_train/1-l": -1.0762206315994263, "rewards_train/1-w": 2.014326333999634, "rewards_train/2-2": 2.2985496520996094, "rewards_train/2-w": 0.9464596509933472, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.09054696559906, "rewards_train/margins_1": 1.0126616954803467, "rewards_train/margins_2": 1.3520900011062622, "step": 389 }, { "epoch": 1.16, "logps_train/policy_1_2": -158.38975524902344, "logps_train/policy_1_l": -158.99114990234375, "logps_train/policy_1_w": -107.5330810546875, "logps_train/policy_2_2": -114.69590759277344, "logps_train/policy_2_w": -151.90240478515625, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.942274808883667, "rewards_train/1-l": -2.3840761184692383, "rewards_train/1-w": 2.568957805633545, "rewards_train/2-2": 3.518690586090088, "rewards_train/2-w": 0.9937443733215332, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.953033924102783, "rewards_train/margins_1": 0.6266829967498779, "rewards_train/margins_2": 2.5249462127685547, "step": 389 }, { "epoch": 1.16, "logps_train/policy_1_2": -140.64540100097656, "logps_train/policy_1_l": -117.44032287597656, "logps_train/policy_1_w": -50.17873001098633, "logps_train/policy_2_2": -109.27588653564453, "logps_train/policy_2_w": -71.71797180175781, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -76.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -88.5, "rewards_train/1-2": 0.6995224356651306, "rewards_train/1-l": -2.2807512283325195, "rewards_train/1-w": 2.619626998901367, "rewards_train/2-2": 2.159911870956421, "rewards_train/2-w": 1.6805466413497925, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.900378227233887, "rewards_train/margins_1": 1.9201045632362366, "rewards_train/margins_2": 0.4793652296066284, "step": 389 }, { "epoch": 1.16, "logps_train/policy_1_2": -263.265380859375, "logps_train/policy_1_l": -214.03280639648438, "logps_train/policy_1_w": -128.84268188476562, "logps_train/policy_2_2": -211.25941467285156, "logps_train/policy_2_w": -175.34686279296875, "logps_train/ref_1_2": -288.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -255.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 2.318773031234741, "rewards_train/1-l": -1.8118736743927002, "rewards_train/1-w": 4.5235443115234375, "rewards_train/2-2": 4.3717145919799805, "rewards_train/2-w": 2.7059383392333984, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.335417985916138, "rewards_train/margins_1": 2.2047712802886963, "rewards_train/margins_2": 1.665776252746582, "step": 389 }, { "epoch": 1.17, "learning_rate": 2.0334877064386277e-06, "loss": 0.5571, "step": 390 }, { "epoch": 1.17, "logps_train/policy_1_2": -188.83230590820312, "logps_train/policy_1_l": -244.8863983154297, "logps_train/policy_1_w": -164.56727600097656, "logps_train/policy_2_2": -149.28297424316406, "logps_train/policy_2_w": -205.60470581054688, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 2.0683321952819824, "rewards_train/1-l": -3.497232675552368, "rewards_train/1-w": 3.481553554534912, "rewards_train/2-2": 3.4357645511627197, "rewards_train/2-w": 1.9832791090011597, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.97878623008728, "rewards_train/margins_1": 1.4132213592529297, "rewards_train/margins_2": 1.45248544216156, "step": 390 }, { "epoch": 1.17, "logps_train/policy_1_2": -160.1285400390625, "logps_train/policy_1_l": -150.040283203125, "logps_train/policy_1_w": -127.7193603515625, "logps_train/policy_2_2": -121.2763671875, "logps_train/policy_2_w": -170.36257934570312, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 0.7465212941169739, "rewards_train/1-l": -2.2489492893218994, "rewards_train/1-w": 2.7361693382263184, "rewards_train/2-2": 2.4333009719848633, "rewards_train/2-w": 1.1231170892715454, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.985118627548218, "rewards_train/margins_1": 1.9896480441093445, "rewards_train/margins_2": 1.3101838827133179, "step": 390 }, { "epoch": 1.17, "logps_train/policy_1_2": -116.72933197021484, "logps_train/policy_1_l": -94.65631866455078, "logps_train/policy_1_w": -99.20368194580078, "logps_train/policy_2_2": -96.00782012939453, "logps_train/policy_2_w": -129.4107666015625, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 1.8926923274993896, "rewards_train/1-l": -1.1924382448196411, "rewards_train/1-w": 3.1905694007873535, "rewards_train/2-2": 2.83046817779541, "rewards_train/2-w": 1.9464226961135864, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.383007645606995, "rewards_train/margins_1": 1.2978770732879639, "rewards_train/margins_2": 0.8840454816818237, "step": 390 }, { "epoch": 1.17, "logps_train/policy_1_2": -81.32656860351562, "logps_train/policy_1_l": -102.70596313476562, "logps_train/policy_1_w": -73.09780883789062, "logps_train/policy_2_2": -67.48261260986328, "logps_train/policy_2_w": -96.47307586669922, "logps_train/ref_1_2": -97.0, "logps_train/ref_1_l": -85.0, "logps_train/ref_1_w": -97.5, "logps_train/ref_2_2": -88.5, "logps_train/ref_2_w": -113.0, "rewards_train/1-2": 1.5610929727554321, "rewards_train/1-l": -1.7823154926300049, "rewards_train/1-w": 2.42928147315979, "rewards_train/2-2": 2.1040825843811035, "rewards_train/2-w": 1.6870672702789307, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.211596965789795, "rewards_train/margins_1": 0.8681885004043579, "rewards_train/margins_2": 0.41701531410217285, "step": 390 }, { "epoch": 1.17, "logps_train/policy_1_2": -183.8640899658203, "logps_train/policy_1_l": -279.830322265625, "logps_train/policy_1_w": -175.302490234375, "logps_train/policy_2_2": -154.76058959960938, "logps_train/policy_2_w": -235.87570190429688, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -255.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -252.0, "rewards_train/1-2": 2.0214028358459473, "rewards_train/1-l": -2.5049071311950684, "rewards_train/1-w": 3.688499927520752, "rewards_train/2-2": 3.1676902770996094, "rewards_train/2-w": 1.656180739402771, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.19340705871582, "rewards_train/margins_1": 1.6670970916748047, "rewards_train/margins_2": 1.5115095376968384, "step": 390 }, { "epoch": 1.17, "logps_train/policy_1_2": -95.60810852050781, "logps_train/policy_1_l": -77.67349243164062, "logps_train/policy_1_w": -68.80802154541016, "logps_train/policy_2_2": -70.08636474609375, "logps_train/policy_2_w": -90.20921325683594, "logps_train/ref_1_2": -107.5, "logps_train/ref_1_l": -64.5, "logps_train/ref_1_w": -94.5, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -109.5, "rewards_train/1-2": 1.1688761711120605, "rewards_train/1-l": -1.305044174194336, "rewards_train/1-w": 2.5723228454589844, "rewards_train/2-2": 2.084137201309204, "rewards_train/2-w": 1.892359733581543, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.8773670196533203, "rewards_train/margins_1": 1.4034466743469238, "rewards_train/margins_2": 0.19177746772766113, "step": 390 }, { "epoch": 1.17, "logps_train/policy_1_2": -142.68807983398438, "logps_train/policy_1_l": -152.96249389648438, "logps_train/policy_1_w": -183.32362365722656, "logps_train/policy_2_2": -110.953125, "logps_train/policy_2_w": -243.83419799804688, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -255.0, "rewards_train/1-2": 2.334317207336426, "rewards_train/1-l": -1.4464436769485474, "rewards_train/1-w": 4.005527496337891, "rewards_train/2-2": 3.31093692779541, "rewards_train/2-w": 1.1454862356185913, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.451971173286438, "rewards_train/margins_1": 1.6712102890014648, "rewards_train/margins_2": 2.165450692176819, "step": 390 }, { "epoch": 1.17, "logps_train/policy_1_2": -172.34994506835938, "logps_train/policy_1_l": -272.2655029296875, "logps_train/policy_1_w": -163.11312866210938, "logps_train/policy_2_2": -139.60015869140625, "logps_train/policy_2_w": -229.5577850341797, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -247.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 2.277505397796631, "rewards_train/1-l": -2.507797956466675, "rewards_train/1-w": 3.832437753677368, "rewards_train/2-2": 3.433734893798828, "rewards_train/2-w": 1.7067222595214844, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.340235710144043, "rewards_train/margins_1": 1.5549323558807373, "rewards_train/margins_2": 1.7270126342773438, "step": 390 }, { "epoch": 1.17, "logps_train/policy_1_2": -107.27145385742188, "logps_train/policy_1_l": -138.14813232421875, "logps_train/policy_1_w": -100.98326110839844, "logps_train/policy_2_2": -77.23768615722656, "logps_train/policy_2_w": -143.03765869140625, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -105.5, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.8892607688903809, "rewards_train/1-l": -2.1427435874938965, "rewards_train/1-w": 2.9497203826904297, "rewards_train/2-2": 2.8090436458587646, "rewards_train/2-w": 1.1235785484313965, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.092463970184326, "rewards_train/margins_1": 1.0604596138000488, "rewards_train/margins_2": 1.6854650974273682, "step": 391 }, { "epoch": 1.17, "logps_train/policy_1_2": -127.86238098144531, "logps_train/policy_1_l": -181.48281860351562, "logps_train/policy_1_w": -128.15200805664062, "logps_train/policy_2_2": -97.8713607788086, "logps_train/policy_2_w": -161.61700439453125, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.737199306488037, "rewards_train/1-l": -2.244081497192383, "rewards_train/1-w": 2.8457350730895996, "rewards_train/2-2": 2.8456761837005615, "rewards_train/2-w": 1.2327333688735962, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.089816570281982, "rewards_train/margins_1": 1.1085357666015625, "rewards_train/margins_2": 1.6129428148269653, "step": 391 }, { "epoch": 1.17, "logps_train/policy_1_2": -204.742919921875, "logps_train/policy_1_l": -194.63699340820312, "logps_train/policy_1_w": -117.02545166015625, "logps_train/policy_2_2": -154.95144653320312, "logps_train/policy_2_w": -156.11428833007812, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 1.5569570064544678, "rewards_train/1-l": -2.2008092403411865, "rewards_train/1-w": 3.3537049293518066, "rewards_train/2-2": 3.279855251312256, "rewards_train/2-w": 2.297945261001587, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.554514169692993, "rewards_train/margins_1": 1.7967479228973389, "rewards_train/margins_2": 0.981909990310669, "step": 391 }, { "epoch": 1.17, "logps_train/policy_1_2": -124.0186767578125, "logps_train/policy_1_l": -142.11453247070312, "logps_train/policy_1_w": -67.42388153076172, "logps_train/policy_2_2": -85.14875793457031, "logps_train/policy_2_w": -106.02436828613281, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -105.5, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": 0.9051636457443237, "rewards_train/1-l": -1.860673189163208, "rewards_train/1-w": 2.3798775672912598, "rewards_train/2-2": 2.058171033859253, "rewards_train/2-w": 0.9069381952285767, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.240550756454468, "rewards_train/margins_1": 1.474713921546936, "rewards_train/margins_2": 1.1512328386306763, "step": 391 }, { "epoch": 1.17, "logps_train/policy_1_2": -186.1261749267578, "logps_train/policy_1_l": -321.2212219238281, "logps_train/policy_1_w": -176.19448852539062, "logps_train/policy_2_2": -136.19290161132812, "logps_train/policy_2_w": -228.09596252441406, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -278.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -243.0, "rewards_train/1-2": 1.5717575550079346, "rewards_train/1-l": -4.381497383117676, "rewards_train/1-w": 3.702425956726074, "rewards_train/2-2": 2.8533668518066406, "rewards_train/2-w": 1.4388409852981567, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 8.08392333984375, "rewards_train/margins_1": 2.1306684017181396, "rewards_train/margins_2": 1.4145258665084839, "step": 391 }, { "epoch": 1.17, "logps_train/policy_1_2": -197.53689575195312, "logps_train/policy_1_l": -198.40855407714844, "logps_train/policy_1_w": -166.56809997558594, "logps_train/policy_2_2": -167.4539031982422, "logps_train/policy_2_w": -199.99053955078125, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -203.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 2.466622829437256, "rewards_train/1-l": -2.3791375160217285, "rewards_train/1-w": 3.639284133911133, "rewards_train/2-2": 3.6655478477478027, "rewards_train/2-w": 2.2165722846984863, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.018421649932861, "rewards_train/margins_1": 1.172661304473877, "rewards_train/margins_2": 1.4489755630493164, "step": 391 }, { "epoch": 1.17, "logps_train/policy_1_2": -146.2607421875, "logps_train/policy_1_l": -167.54888916015625, "logps_train/policy_1_w": -113.8502197265625, "logps_train/policy_2_2": -112.736083984375, "logps_train/policy_2_w": -159.4880828857422, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.9379398822784424, "rewards_train/1-l": -2.102544069290161, "rewards_train/1-w": 3.2989625930786133, "rewards_train/2-2": 3.1957762241363525, "rewards_train/2-w": 1.6312692165374756, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.401506662368774, "rewards_train/margins_1": 1.361022710800171, "rewards_train/margins_2": 1.564507007598877, "step": 391 }, { "epoch": 1.17, "logps_train/policy_1_2": -152.28533935546875, "logps_train/policy_1_l": -212.71273803710938, "logps_train/policy_1_w": -136.37188720703125, "logps_train/policy_2_2": -128.3227081298828, "logps_train/policy_2_w": -162.886962890625, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.8699036836624146, "rewards_train/1-l": -2.9482264518737793, "rewards_train/1-w": 2.444404363632202, "rewards_train/2-2": 2.673980236053467, "rewards_train/2-w": 1.585521936416626, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.3926308155059814, "rewards_train/margins_1": 0.5745006799697876, "rewards_train/margins_2": 1.0884582996368408, "step": 391 }, { "epoch": 1.17, "learning_rate": 2.009246631461129e-06, "loss": 0.3621, "step": 392 }, { "epoch": 1.17, "logps_train/policy_1_2": -106.33442687988281, "logps_train/policy_1_l": -85.26631927490234, "logps_train/policy_1_w": -44.632110595703125, "logps_train/policy_2_2": -89.6636962890625, "logps_train/policy_2_w": -61.5790901184082, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -71.5, "logps_train/ref_1_w": -58.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -65.0, "rewards_train/1-2": 1.7665574550628662, "rewards_train/1-l": -1.3600305318832397, "rewards_train/1-w": 1.3324923515319824, "rewards_train/2-2": 2.44183349609375, "rewards_train/2-w": 0.3295910060405731, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.692522883415222, "rewards_train/margins_1": -0.4340651035308838, "rewards_train/margins_2": 2.112242490053177, "step": 392 }, { "epoch": 1.17, "logps_train/policy_1_2": -116.6393051147461, "logps_train/policy_1_l": -157.10726928710938, "logps_train/policy_1_w": -120.19792175292969, "logps_train/policy_2_2": -85.41683959960938, "logps_train/policy_2_w": -166.99786376953125, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -110.5, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.4001319408416748, "rewards_train/1-l": -2.6794755458831787, "rewards_train/1-w": 4.0645833015441895, "rewards_train/2-2": 2.513003349304199, "rewards_train/2-w": 1.8502135276794434, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.744058847427368, "rewards_train/margins_1": 2.6644513607025146, "rewards_train/margins_2": 0.6627898216247559, "step": 392 }, { "epoch": 1.17, "logps_train/policy_1_2": -184.97177124023438, "logps_train/policy_1_l": -131.91455078125, "logps_train/policy_1_w": -127.74778747558594, "logps_train/policy_2_2": -157.74752807617188, "logps_train/policy_2_w": -160.98519897460938, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.998136043548584, "rewards_train/1-l": -1.8767094612121582, "rewards_train/1-w": 2.8627216815948486, "rewards_train/2-2": 3.408841371536255, "rewards_train/2-w": 1.7608551979064941, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.739431142807007, "rewards_train/margins_1": 0.8645856380462646, "rewards_train/margins_2": 1.6479861736297607, "step": 392 }, { "epoch": 1.17, "logps_train/policy_1_2": -92.30656433105469, "logps_train/policy_1_l": -101.25543212890625, "logps_train/policy_1_w": -67.75547790527344, "logps_train/policy_2_2": -70.36446380615234, "logps_train/policy_2_w": -100.14241027832031, "logps_train/ref_1_2": -103.0, "logps_train/ref_1_l": -82.5, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -89.0, "logps_train/ref_2_w": -114.5, "rewards_train/1-2": 1.0521568059921265, "rewards_train/1-l": -1.8829654455184937, "rewards_train/1-w": 2.786952018737793, "rewards_train/2-2": 1.8217566013336182, "rewards_train/2-w": 1.417008876800537, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.669917464256287, "rewards_train/margins_1": 1.7347952127456665, "rewards_train/margins_2": 0.40474772453308105, "step": 392 }, { "epoch": 1.17, "logps_train/policy_1_2": -143.9246826171875, "logps_train/policy_1_l": -106.97380065917969, "logps_train/policy_1_w": -125.78657531738281, "logps_train/policy_2_2": -115.3800277709961, "logps_train/policy_2_w": -169.37924194335938, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.1708126068115234, "rewards_train/1-l": -1.4126642942428589, "rewards_train/1-w": 3.0791549682617188, "rewards_train/2-2": 2.188169002532959, "rewards_train/2-w": 1.0331681966781616, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.491819262504578, "rewards_train/margins_1": 1.9083423614501953, "rewards_train/margins_2": 1.1550008058547974, "step": 392 }, { "epoch": 1.17, "logps_train/policy_1_2": -211.15219116210938, "logps_train/policy_1_l": -277.69683837890625, "logps_train/policy_1_w": -111.87537384033203, "logps_train/policy_2_2": -166.94241333007812, "logps_train/policy_2_w": -148.19827270507812, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -244.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.2910300493240356, "rewards_train/1-l": -3.427494764328003, "rewards_train/1-w": 2.6972291469573975, "rewards_train/2-2": 3.1526336669921875, "rewards_train/2-w": 1.3391563892364502, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.1247239112854, "rewards_train/margins_1": 1.4061990976333618, "rewards_train/margins_2": 1.8134772777557373, "step": 392 }, { "epoch": 1.17, "logps_train/policy_1_2": -108.7646255493164, "logps_train/policy_1_l": -123.51005554199219, "logps_train/policy_1_w": -93.59232330322266, "logps_train/policy_2_2": -85.87754821777344, "logps_train/policy_2_w": -116.61560821533203, "logps_train/ref_1_2": -126.5, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.7520527839660645, "rewards_train/1-l": -2.8799118995666504, "rewards_train/1-w": 2.7751429080963135, "rewards_train/2-2": 2.838808059692383, "rewards_train/2-w": 1.7587518692016602, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.655054807662964, "rewards_train/margins_1": 1.023090124130249, "rewards_train/margins_2": 1.0800561904907227, "step": 392 }, { "epoch": 1.17, "logps_train/policy_1_2": -139.55831909179688, "logps_train/policy_1_l": -181.2159423828125, "logps_train/policy_1_w": -129.83348083496094, "logps_train/policy_2_2": -107.47364807128906, "logps_train/policy_2_w": -185.59059143066406, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 1.5308873653411865, "rewards_train/1-l": -2.7176880836486816, "rewards_train/1-w": 3.497901439666748, "rewards_train/2-2": 2.8311502933502197, "rewards_train/2-w": 1.7182846069335938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.21558952331543, "rewards_train/margins_1": 1.9670140743255615, "rewards_train/margins_2": 1.112865686416626, "step": 392 }, { "epoch": 1.18, "logps_train/policy_1_2": -146.72703552246094, "logps_train/policy_1_l": -213.21572875976562, "logps_train/policy_1_w": -113.50067138671875, "logps_train/policy_2_2": -110.0308609008789, "logps_train/policy_2_w": -161.41978454589844, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.2179220914840698, "rewards_train/1-l": -3.073915958404541, "rewards_train/1-w": 2.6249332427978516, "rewards_train/2-2": 2.5969133377075195, "rewards_train/2-w": 0.8017717599868774, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.698849201202393, "rewards_train/margins_1": 1.4070111513137817, "rewards_train/margins_2": 1.795141577720642, "step": 393 }, { "epoch": 1.18, "logps_train/policy_1_2": -127.32872009277344, "logps_train/policy_1_l": -164.2261199951172, "logps_train/policy_1_w": -79.09640502929688, "logps_train/policy_2_2": -95.945556640625, "logps_train/policy_2_w": -113.29981994628906, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 1.071815013885498, "rewards_train/1-l": -2.510307788848877, "rewards_train/1-w": 2.15832781791687, "rewards_train/2-2": 2.2640380859375, "rewards_train/2-w": 0.7887675762176514, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.668635606765747, "rewards_train/margins_1": 1.086512804031372, "rewards_train/margins_2": 1.4752705097198486, "step": 393 }, { "epoch": 1.18, "logps_train/policy_1_2": -160.8861541748047, "logps_train/policy_1_l": -233.9813690185547, "logps_train/policy_1_w": -130.96807861328125, "logps_train/policy_2_2": -128.1610107421875, "logps_train/policy_2_w": -177.29904174804688, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.5363843441009521, "rewards_train/1-l": -4.032510757446289, "rewards_train/1-w": 3.458660364151001, "rewards_train/2-2": 2.562023639678955, "rewards_train/2-w": 1.4372824430465698, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.49117112159729, "rewards_train/margins_1": 1.9222760200500488, "rewards_train/margins_2": 1.1247411966323853, "step": 393 }, { "epoch": 1.18, "logps_train/policy_1_2": -120.79335021972656, "logps_train/policy_1_l": -135.76898193359375, "logps_train/policy_1_w": -76.34668731689453, "logps_train/policy_2_2": -90.85984802246094, "logps_train/policy_2_w": -106.82964324951172, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -95.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -112.5, "rewards_train/1-2": 0.844883382320404, "rewards_train/1-l": -1.870451807975769, "rewards_train/1-w": 1.8293936252593994, "rewards_train/2-2": 1.9296404123306274, "rewards_train/2-w": 0.5685981512069702, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6998454332351685, "rewards_train/margins_1": 0.9845102429389954, "rewards_train/margins_2": 1.3610422611236572, "step": 393 }, { "epoch": 1.18, "logps_train/policy_1_2": -157.15650939941406, "logps_train/policy_1_l": -109.09248352050781, "logps_train/policy_1_w": -58.90756607055664, "logps_train/policy_2_2": -110.56977844238281, "logps_train/policy_2_w": -81.80335998535156, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -97.5, "rewards_train/1-2": 1.2905995845794678, "rewards_train/1-l": -2.5077836513519287, "rewards_train/1-w": 2.5346336364746094, "rewards_train/2-2": 3.263334274291992, "rewards_train/2-w": 1.6071642637252808, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.042417287826538, "rewards_train/margins_1": 1.2440340518951416, "rewards_train/margins_2": 1.6561700105667114, "step": 393 }, { "epoch": 1.18, "logps_train/policy_1_2": -124.71324920654297, "logps_train/policy_1_l": -183.51092529296875, "logps_train/policy_1_w": -116.39346313476562, "logps_train/policy_2_2": -103.02432250976562, "logps_train/policy_2_w": -148.36459350585938, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.7482067346572876, "rewards_train/1-l": -2.697185754776001, "rewards_train/1-w": 3.080965757369995, "rewards_train/2-2": 2.635458469390869, "rewards_train/2-w": 1.7651031017303467, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.778151512145996, "rewards_train/margins_1": 1.3327590227127075, "rewards_train/margins_2": 0.8703553676605225, "step": 393 }, { "epoch": 1.18, "logps_train/policy_1_2": -155.5301971435547, "logps_train/policy_1_l": -109.11892700195312, "logps_train/policy_1_w": -66.17352294921875, "logps_train/policy_2_2": -110.6197738647461, "logps_train/policy_2_w": -104.03241729736328, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 0.45713678002357483, "rewards_train/1-l": -1.2806674242019653, "rewards_train/1-w": 2.3881163597106934, "rewards_train/2-2": 2.515366554260254, "rewards_train/2-w": 1.453789472579956, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.6687837839126587, "rewards_train/margins_1": 1.9309795796871185, "rewards_train/margins_2": 1.0615770816802979, "step": 393 }, { "epoch": 1.18, "logps_train/policy_1_2": -138.186767578125, "logps_train/policy_1_l": -112.59610748291016, "logps_train/policy_1_w": -170.06993103027344, "logps_train/policy_2_2": -113.53633117675781, "logps_train/policy_2_w": -220.56260681152344, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 1.9266369342803955, "rewards_train/1-l": -1.7916420698165894, "rewards_train/1-w": 4.8648810386657715, "rewards_train/2-2": 2.7838666439056396, "rewards_train/2-w": 2.5249886512756348, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.656523108482361, "rewards_train/margins_1": 2.938244104385376, "rewards_train/margins_2": 0.2588779926300049, "step": 393 }, { "epoch": 1.18, "learning_rate": 1.9850534531472544e-06, "loss": 0.5174, "step": 394 }, { "epoch": 1.18, "logps_train/policy_1_2": -207.21417236328125, "logps_train/policy_1_l": -162.45245361328125, "logps_train/policy_1_w": -99.35281372070312, "logps_train/policy_2_2": -168.7852783203125, "logps_train/policy_2_w": -125.5472183227539, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.24733304977417, "rewards_train/1-l": -1.689387321472168, "rewards_train/1-w": 3.1748743057250977, "rewards_train/2-2": 2.9765512943267822, "rewards_train/2-w": 2.377309560775757, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.864261627197266, "rewards_train/margins_1": 1.9275412559509277, "rewards_train/margins_2": 0.5992417335510254, "step": 394 }, { "epoch": 1.18, "logps_train/policy_1_2": -137.56324768066406, "logps_train/policy_1_l": -118.15089416503906, "logps_train/policy_1_w": -110.82671356201172, "logps_train/policy_2_2": -108.79545593261719, "logps_train/policy_2_w": -151.72877502441406, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.27961266040802, "rewards_train/1-l": -1.2151873111724854, "rewards_train/1-w": 2.7423288822174072, "rewards_train/2-2": 2.322798728942871, "rewards_train/2-w": 0.9255595803260803, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.9575161933898926, "rewards_train/margins_1": 1.4627162218093872, "rewards_train/margins_2": 1.3972391486167908, "step": 394 }, { "epoch": 1.18, "logps_train/policy_1_2": -182.1818084716797, "logps_train/policy_1_l": -257.78631591796875, "logps_train/policy_1_w": -124.3185043334961, "logps_train/policy_2_2": -142.23260498046875, "logps_train/policy_2_w": -167.62261962890625, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -232.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.608381748199463, "rewards_train/1-l": -2.609102725982666, "rewards_train/1-w": 3.7447118759155273, "rewards_train/2-2": 3.0626778602600098, "rewards_train/2-w": 1.9877382516860962, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.353814601898193, "rewards_train/margins_1": 2.1363301277160645, "rewards_train/margins_2": 1.0749396085739136, "step": 394 }, { "epoch": 1.18, "logps_train/policy_1_2": -217.38877868652344, "logps_train/policy_1_l": -228.06155395507812, "logps_train/policy_1_w": -196.2923126220703, "logps_train/policy_2_2": -173.09262084960938, "logps_train/policy_2_w": -266.087646484375, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -244.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -288.0, "rewards_train/1-2": 1.660926342010498, "rewards_train/1-l": -2.5459985733032227, "rewards_train/1-w": 4.776236534118652, "rewards_train/2-2": 3.1689610481262207, "rewards_train/2-w": 2.308424234390259, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.322235107421875, "rewards_train/margins_1": 3.1153101921081543, "rewards_train/margins_2": 0.8605368137359619, "step": 394 }, { "epoch": 1.18, "logps_train/policy_1_2": -35.07499694824219, "logps_train/policy_1_l": -49.26124572753906, "logps_train/policy_1_w": -59.43681335449219, "logps_train/policy_2_2": -21.05270767211914, "logps_train/policy_2_w": -93.29090118408203, "logps_train/ref_1_2": -38.0, "logps_train/ref_1_l": -41.75, "logps_train/ref_1_w": -75.5, "logps_train/ref_2_2": -30.75, "logps_train/ref_2_w": -94.0, "rewards_train/1-2": 0.2803906798362732, "rewards_train/1-l": -0.7500994205474854, "rewards_train/1-w": 1.589033603668213, "rewards_train/2-2": 0.9654325246810913, "rewards_train/2-w": 0.038097068667411804, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.3391330242156982, "rewards_train/margins_1": 1.3086429238319397, "rewards_train/margins_2": 0.9273354560136795, "step": 394 }, { "epoch": 1.18, "logps_train/policy_1_2": -86.10183715820312, "logps_train/policy_1_l": -122.32329559326172, "logps_train/policy_1_w": -93.85492706298828, "logps_train/policy_2_2": -67.85735321044922, "logps_train/policy_2_w": -116.05549621582031, "logps_train/ref_1_2": -94.5, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -115.5, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 0.8534882068634033, "rewards_train/1-l": -1.907524585723877, "rewards_train/1-w": 2.166851282119751, "rewards_train/2-2": 1.8323800563812256, "rewards_train/2-w": 1.32101309299469, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.074375867843628, "rewards_train/margins_1": 1.3133630752563477, "rewards_train/margins_2": 0.5113669633865356, "step": 394 }, { "epoch": 1.18, "logps_train/policy_1_2": -187.15670776367188, "logps_train/policy_1_l": -192.3430633544922, "logps_train/policy_1_w": -117.103515625, "logps_train/policy_2_2": -149.86441040039062, "logps_train/policy_2_w": -156.5057373046875, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 2.131791830062866, "rewards_train/1-l": -2.0362601280212402, "rewards_train/1-w": 3.9130859375, "rewards_train/2-2": 3.4737157821655273, "rewards_train/2-w": 2.3400516510009766, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.94934606552124, "rewards_train/margins_1": 1.7812941074371338, "rewards_train/margins_2": 1.1336641311645508, "step": 394 }, { "epoch": 1.18, "logps_train/policy_1_2": -62.467933654785156, "logps_train/policy_1_l": -60.13898468017578, "logps_train/policy_1_w": -50.76410675048828, "logps_train/policy_2_2": -42.78864288330078, "logps_train/policy_2_w": -77.97059631347656, "logps_train/ref_1_2": -73.0, "logps_train/ref_1_l": -47.0, "logps_train/ref_1_w": -72.5, "logps_train/ref_2_2": -60.25, "logps_train/ref_2_w": -89.0, "rewards_train/1-2": 1.050472378730774, "rewards_train/1-l": -1.3172186613082886, "rewards_train/1-w": 2.1657772064208984, "rewards_train/2-2": 1.7476983070373535, "rewards_train/2-w": 1.1060657501220703, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.482995867729187, "rewards_train/margins_1": 1.1153048276901245, "rewards_train/margins_2": 0.6416325569152832, "step": 394 }, { "epoch": 1.18, "logps_train/policy_1_2": -222.31570434570312, "logps_train/policy_1_l": -158.03466796875, "logps_train/policy_1_w": -116.99116516113281, "logps_train/policy_2_2": -169.95285034179688, "logps_train/policy_2_w": -159.45498657226562, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 0.7965542078018188, "rewards_train/1-l": -1.4179198741912842, "rewards_train/1-w": 2.9641647338867188, "rewards_train/2-2": 2.8500285148620605, "rewards_train/2-w": 1.088876724243164, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.382084608078003, "rewards_train/margins_1": 2.1676105260849, "rewards_train/margins_2": 1.7611517906188965, "step": 395 }, { "epoch": 1.18, "logps_train/policy_1_2": -131.5325164794922, "logps_train/policy_1_l": -146.12997436523438, "logps_train/policy_1_w": -84.62603759765625, "logps_train/policy_2_2": -98.27310943603516, "logps_train/policy_2_w": -123.85980987548828, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.2412793636322021, "rewards_train/1-l": -1.4513757228851318, "rewards_train/1-w": 2.7455997467041016, "rewards_train/2-2": 2.4625325202941895, "rewards_train/2-w": 1.5839409828186035, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.196975469589233, "rewards_train/margins_1": 1.5043203830718994, "rewards_train/margins_2": 0.8785915374755859, "step": 395 }, { "epoch": 1.18, "logps_train/policy_1_2": -239.58071899414062, "logps_train/policy_1_l": -256.4241943359375, "logps_train/policy_1_w": -138.63955688476562, "logps_train/policy_2_2": -194.66864013671875, "logps_train/policy_2_w": -186.80328369140625, "logps_train/ref_1_2": -254.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -230.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.4653658866882324, "rewards_train/1-l": -2.597104549407959, "rewards_train/1-w": 3.151669502258301, "rewards_train/2-2": 3.619072914123535, "rewards_train/2-w": 1.7056093215942383, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.74877405166626, "rewards_train/margins_1": 1.6863036155700684, "rewards_train/margins_2": 1.9134635925292969, "step": 395 }, { "epoch": 1.18, "logps_train/policy_1_2": -142.9627685546875, "logps_train/policy_1_l": -185.66778564453125, "logps_train/policy_1_w": -99.33749389648438, "logps_train/policy_2_2": -112.37024688720703, "logps_train/policy_2_w": -130.26524353027344, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.2459089756011963, "rewards_train/1-l": -3.5253732204437256, "rewards_train/1-w": 3.931875705718994, "rewards_train/2-2": 2.6129751205444336, "rewards_train/2-w": 2.845350742340088, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.45724892616272, "rewards_train/margins_1": 2.685966730117798, "rewards_train/margins_2": -0.2323756217956543, "step": 395 }, { "epoch": 1.18, "logps_train/policy_1_2": -141.8628387451172, "logps_train/policy_1_l": -162.7513427734375, "logps_train/policy_1_w": -144.885986328125, "logps_train/policy_2_2": -118.5732192993164, "logps_train/policy_2_w": -182.45697021484375, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 2.0820748805999756, "rewards_train/1-l": -2.1243534088134766, "rewards_train/1-w": 3.3442139625549316, "rewards_train/2-2": 2.8459982872009277, "rewards_train/2-w": 1.8332099914550781, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.468567371368408, "rewards_train/margins_1": 1.262139081954956, "rewards_train/margins_2": 1.0127882957458496, "step": 395 }, { "epoch": 1.18, "logps_train/policy_1_2": -143.68954467773438, "logps_train/policy_1_l": -114.63351440429688, "logps_train/policy_1_w": -94.67178344726562, "logps_train/policy_2_2": -111.01252746582031, "logps_train/policy_2_w": -123.33786010742188, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -98.5, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 1.4840738773345947, "rewards_train/1-l": -1.6097620725631714, "rewards_train/1-w": 2.8771324157714844, "rewards_train/2-2": 2.800992965698242, "rewards_train/2-w": 1.5042998790740967, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.486894488334656, "rewards_train/margins_1": 1.3930585384368896, "rewards_train/margins_2": 1.2966930866241455, "step": 395 }, { "epoch": 1.18, "logps_train/policy_1_2": -137.70150756835938, "logps_train/policy_1_l": -155.201416015625, "logps_train/policy_1_w": -86.88917541503906, "logps_train/policy_2_2": -107.97502136230469, "logps_train/policy_2_w": -108.07567596435547, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.4048501253128052, "rewards_train/1-l": -2.191039562225342, "rewards_train/1-w": 2.3911609649658203, "rewards_train/2-2": 2.7021074295043945, "rewards_train/2-w": 1.4799330234527588, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.582200527191162, "rewards_train/margins_1": 0.9863108396530151, "rewards_train/margins_2": 1.2221744060516357, "step": 395 }, { "epoch": 1.18, "logps_train/policy_1_2": -189.60667419433594, "logps_train/policy_1_l": -148.1711883544922, "logps_train/policy_1_w": -158.48745727539062, "logps_train/policy_2_2": -139.49440002441406, "logps_train/policy_2_w": -211.32122802734375, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 0.7942640781402588, "rewards_train/1-l": -1.7233686447143555, "rewards_train/1-w": 2.8222012519836426, "rewards_train/2-2": 2.76931095123291, "rewards_train/2-w": 0.6034228205680847, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.545569896697998, "rewards_train/margins_1": 2.027937173843384, "rewards_train/margins_2": 2.1658881306648254, "step": 395 }, { "epoch": 1.19, "learning_rate": 1.960910532708558e-06, "loss": 0.5144, "step": 396 }, { "epoch": 1.19, "logps_train/policy_1_2": -165.239990234375, "logps_train/policy_1_l": -105.78076171875, "logps_train/policy_1_w": -108.26368713378906, "logps_train/policy_2_2": -123.67396545410156, "logps_train/policy_2_w": -148.0270233154297, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.3603752851486206, "rewards_train/1-l": -1.1841795444488525, "rewards_train/1-w": 3.2048821449279785, "rewards_train/2-2": 2.7798690795898438, "rewards_train/2-w": 1.354719638824463, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.389061689376831, "rewards_train/margins_1": 1.844506859779358, "rewards_train/margins_2": 1.4251494407653809, "step": 396 }, { "epoch": 1.19, "logps_train/policy_1_2": -275.5873718261719, "logps_train/policy_1_l": -277.29058837890625, "logps_train/policy_1_w": -136.0381622314453, "logps_train/policy_2_2": -222.9694366455078, "logps_train/policy_2_w": -187.90936279296875, "logps_train/ref_1_2": -296.0, "logps_train/ref_1_l": -242.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -268.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": 2.0131373405456543, "rewards_train/1-l": -3.5884337425231934, "rewards_train/1-w": 3.7461843490600586, "rewards_train/2-2": 4.356180191040039, "rewards_train/2-w": 1.9090633392333984, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.334618091583252, "rewards_train/margins_1": 1.7330470085144043, "rewards_train/margins_2": 2.4471168518066406, "step": 396 }, { "epoch": 1.19, "logps_train/policy_1_2": -163.51809692382812, "logps_train/policy_1_l": -221.87771606445312, "logps_train/policy_1_w": -194.14447021484375, "logps_train/policy_2_2": -136.518310546875, "logps_train/policy_2_w": -239.46041870117188, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -230.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.7060039043426514, "rewards_train/1-l": -1.9510540962219238, "rewards_train/1-w": 3.5480520725250244, "rewards_train/2-2": 2.759106397628784, "rewards_train/2-w": 1.4914584159851074, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.499106168746948, "rewards_train/margins_1": 1.842048168182373, "rewards_train/margins_2": 1.2676479816436768, "step": 396 }, { "epoch": 1.19, "logps_train/policy_1_2": -201.30323791503906, "logps_train/policy_1_l": -193.75094604492188, "logps_train/policy_1_w": -121.4582290649414, "logps_train/policy_2_2": -173.71539306640625, "logps_train/policy_2_w": -144.1872100830078, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 2.300926923751831, "rewards_train/1-l": -2.3090784549713135, "rewards_train/1-w": 3.091677188873291, "rewards_train/2-2": 3.2878363132476807, "rewards_train/2-w": 2.2250289916992188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.4007556438446045, "rewards_train/margins_1": 0.79075026512146, "rewards_train/margins_2": 1.062807321548462, "step": 396 }, { "epoch": 1.19, "logps_train/policy_1_2": -108.64573669433594, "logps_train/policy_1_l": -99.94335174560547, "logps_train/policy_1_w": -110.90361785888672, "logps_train/policy_2_2": -95.81631469726562, "logps_train/policy_2_w": -128.41224670410156, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.4823012351989746, "rewards_train/1-l": -1.599022626876831, "rewards_train/1-w": 1.9649598598480225, "rewards_train/2-2": 2.022274971008301, "rewards_train/2-w": 1.4142441749572754, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5639824867248535, "rewards_train/margins_1": 0.48265862464904785, "rewards_train/margins_2": 0.6080307960510254, "step": 396 }, { "epoch": 1.19, "logps_train/policy_1_2": -79.05406188964844, "logps_train/policy_1_l": -112.78855895996094, "logps_train/policy_1_w": -29.423830032348633, "logps_train/policy_2_2": -53.61498260498047, "logps_train/policy_2_w": -45.80491638183594, "logps_train/ref_1_2": -87.5, "logps_train/ref_1_l": -87.5, "logps_train/ref_1_w": -50.5, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -60.5, "rewards_train/1-2": 0.8363906145095825, "rewards_train/1-l": -2.5214338302612305, "rewards_train/1-w": 2.1201171875, "rewards_train/2-2": 2.0556893348693848, "rewards_train/2-w": 1.4679456949234009, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.6415510177612305, "rewards_train/margins_1": 1.2837265729904175, "rewards_train/margins_2": 0.5877436399459839, "step": 396 }, { "epoch": 1.19, "logps_train/policy_1_2": -175.00677490234375, "logps_train/policy_1_l": -214.34432983398438, "logps_train/policy_1_w": -125.84857940673828, "logps_train/policy_2_2": -140.50726318359375, "logps_train/policy_2_w": -152.5933380126953, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": 1.9993230104446411, "rewards_train/1-l": -2.9719319343566895, "rewards_train/1-w": 3.1745171546936035, "rewards_train/2-2": 3.2867746353149414, "rewards_train/2-w": 2.443790912628174, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.146449089050293, "rewards_train/margins_1": 1.1751941442489624, "rewards_train/margins_2": 0.8429837226867676, "step": 396 }, { "epoch": 1.19, "logps_train/policy_1_2": -97.12886047363281, "logps_train/policy_1_l": -160.12574768066406, "logps_train/policy_1_w": -83.92117309570312, "logps_train/policy_2_2": -73.26258850097656, "logps_train/policy_2_w": -112.27262878417969, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.4073045253753662, "rewards_train/1-l": -1.9212898015975952, "rewards_train/1-w": 2.584322452545166, "rewards_train/2-2": 1.9633653163909912, "rewards_train/2-w": 1.3748610019683838, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.505612254142761, "rewards_train/margins_1": 1.1770179271697998, "rewards_train/margins_2": 0.5885043144226074, "step": 396 }, { "epoch": 1.19, "logps_train/policy_1_2": -112.04586791992188, "logps_train/policy_1_l": -154.77178955078125, "logps_train/policy_1_w": -106.51460266113281, "logps_train/policy_2_2": -85.01435852050781, "logps_train/policy_2_w": -137.2323760986328, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.624319314956665, "rewards_train/1-l": -2.105010509490967, "rewards_train/1-w": 3.1610398292541504, "rewards_train/2-2": 2.9657516479492188, "rewards_train/2-w": 1.3080124855041504, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.266050338745117, "rewards_train/margins_1": 1.5367205142974854, "rewards_train/margins_2": 1.6577391624450684, "step": 397 }, { "epoch": 1.19, "logps_train/policy_1_2": -159.80841064453125, "logps_train/policy_1_l": -104.2806625366211, "logps_train/policy_1_w": -115.93505859375, "logps_train/policy_2_2": -126.29121398925781, "logps_train/policy_2_w": -151.8816375732422, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.6476750373840332, "rewards_train/1-l": -1.4151756763458252, "rewards_train/1-w": 3.284618616104126, "rewards_train/2-2": 3.126347780227661, "rewards_train/2-w": 1.9368358850479126, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.699794292449951, "rewards_train/margins_1": 1.6369435787200928, "rewards_train/margins_2": 1.1895118951797485, "step": 397 }, { "epoch": 1.19, "logps_train/policy_1_2": -211.72225952148438, "logps_train/policy_1_l": -186.9496612548828, "logps_train/policy_1_w": -123.9532699584961, "logps_train/policy_2_2": -155.13973999023438, "logps_train/policy_2_w": -177.16848754882812, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.9137113094329834, "rewards_train/1-l": -2.5426218509674072, "rewards_train/1-w": 3.4551610946655273, "rewards_train/2-2": 3.7657127380371094, "rewards_train/2-w": 1.2640111446380615, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.997782945632935, "rewards_train/margins_1": 1.541449785232544, "rewards_train/margins_2": 2.501701593399048, "step": 397 }, { "epoch": 1.19, "logps_train/policy_1_2": -294.9017028808594, "logps_train/policy_1_l": -226.56600952148438, "logps_train/policy_1_w": -164.14427185058594, "logps_train/policy_2_2": -220.0930633544922, "logps_train/policy_2_w": -230.2757568359375, "logps_train/ref_1_2": -312.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -268.0, "logps_train/ref_2_w": -255.0, "rewards_train/1-2": 1.7973315715789795, "rewards_train/1-l": -3.031601905822754, "rewards_train/1-w": 5.041822910308838, "rewards_train/2-2": 4.687568664550781, "rewards_train/2-w": 2.49117374420166, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 8.073424816131592, "rewards_train/margins_1": 3.2444913387298584, "rewards_train/margins_2": 2.196394920349121, "step": 397 }, { "epoch": 1.19, "logps_train/policy_1_2": -127.23461151123047, "logps_train/policy_1_l": -171.21405029296875, "logps_train/policy_1_w": -69.17440795898438, "logps_train/policy_2_2": -98.39418029785156, "logps_train/policy_2_w": -88.94629669189453, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -108.5, "rewards_train/1-2": 1.042945384979248, "rewards_train/1-l": -2.479998826980591, "rewards_train/1-w": 2.7931058406829834, "rewards_train/2-2": 1.6734724044799805, "rewards_train/2-w": 1.955370306968689, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.273104667663574, "rewards_train/margins_1": 1.7501604557037354, "rewards_train/margins_2": -0.2818979024887085, "step": 397 }, { "epoch": 1.19, "logps_train/policy_1_2": -171.1965789794922, "logps_train/policy_1_l": -178.24737548828125, "logps_train/policy_1_w": -112.74813079833984, "logps_train/policy_2_2": -132.55816650390625, "logps_train/policy_2_w": -165.3050994873047, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.2623730897903442, "rewards_train/1-l": -3.0569634437561035, "rewards_train/1-w": 3.1621010303497314, "rewards_train/2-2": 3.1787521839141846, "rewards_train/2-w": 1.267146110534668, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.219064474105835, "rewards_train/margins_1": 1.8997279405593872, "rewards_train/margins_2": 1.9116060733795166, "step": 397 }, { "epoch": 1.19, "logps_train/policy_1_2": -152.15109252929688, "logps_train/policy_1_l": -161.1944122314453, "logps_train/policy_1_w": -107.13179016113281, "logps_train/policy_2_2": -104.79719543457031, "logps_train/policy_2_w": -156.6257781982422, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.2216107845306396, "rewards_train/1-l": -1.9223713874816895, "rewards_train/1-w": 3.5821337699890137, "rewards_train/2-2": 3.227311849594116, "rewards_train/2-w": 1.5530472993850708, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.504505157470703, "rewards_train/margins_1": 2.360522985458374, "rewards_train/margins_2": 1.6742645502090454, "step": 397 }, { "epoch": 1.19, "logps_train/policy_1_2": -79.65531921386719, "logps_train/policy_1_l": -91.13435363769531, "logps_train/policy_1_w": -82.9044189453125, "logps_train/policy_2_2": -65.28751373291016, "logps_train/policy_2_w": -107.74885559082031, "logps_train/ref_1_2": -95.0, "logps_train/ref_1_l": -72.5, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -88.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 1.5524369478225708, "rewards_train/1-l": -1.837653636932373, "rewards_train/1-w": 2.4822142124176025, "rewards_train/2-2": 2.282186269760132, "rewards_train/2-w": 1.3493330478668213, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.319867849349976, "rewards_train/margins_1": 0.9297772645950317, "rewards_train/margins_2": 0.9328532218933105, "step": 397 }, { "epoch": 1.19, "learning_rate": 1.9368202264515127e-06, "loss": 0.3902, "step": 398 }, { "epoch": 1.19, "logps_train/policy_1_2": -112.62613677978516, "logps_train/policy_1_l": -108.62994384765625, "logps_train/policy_1_w": -94.80138397216797, "logps_train/policy_2_2": -87.64839172363281, "logps_train/policy_2_w": -130.65972900390625, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -89.5, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.5686362981796265, "rewards_train/1-l": -1.9169005155563354, "rewards_train/1-w": 3.0761117935180664, "rewards_train/2-2": 2.4460980892181396, "rewards_train/2-w": 1.7105909585952759, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.993012309074402, "rewards_train/margins_1": 1.50747549533844, "rewards_train/margins_2": 0.7355071306228638, "step": 398 }, { "epoch": 1.19, "logps_train/policy_1_2": -155.26409912109375, "logps_train/policy_1_l": -128.84805297851562, "logps_train/policy_1_w": -82.81365203857422, "logps_train/policy_2_2": -126.68035888671875, "logps_train/policy_2_w": -100.9203872680664, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 1.0517140626907349, "rewards_train/1-l": -1.7427160739898682, "rewards_train/1-w": 2.260822296142578, "rewards_train/2-2": 2.4929022789001465, "rewards_train/2-w": 1.3368675708770752, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.003538370132446, "rewards_train/margins_1": 1.2091082334518433, "rewards_train/margins_2": 1.1560347080230713, "step": 398 }, { "epoch": 1.19, "logps_train/policy_1_2": -179.00320434570312, "logps_train/policy_1_l": -165.34556579589844, "logps_train/policy_1_w": -106.4652099609375, "logps_train/policy_2_2": -137.12855529785156, "logps_train/policy_2_w": -151.8397216796875, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.102805495262146, "rewards_train/1-l": -3.2017438411712646, "rewards_train/1-w": 4.928478240966797, "rewards_train/2-2": 3.1637063026428223, "rewards_train/2-w": 3.025402069091797, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 8.130222082138062, "rewards_train/margins_1": 3.825672745704651, "rewards_train/margins_2": 0.1383042335510254, "step": 398 }, { "epoch": 1.19, "logps_train/policy_1_2": -115.09754180908203, "logps_train/policy_1_l": -85.25336456298828, "logps_train/policy_1_w": -53.56264877319336, "logps_train/policy_2_2": -86.54215240478516, "logps_train/policy_2_w": -83.50984954833984, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -67.0, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 0.9308710098266602, "rewards_train/1-l": -1.803851842880249, "rewards_train/1-w": 2.6577978134155273, "rewards_train/2-2": 2.141878128051758, "rewards_train/2-w": 1.4771395921707153, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.461649656295776, "rewards_train/margins_1": 1.7269268035888672, "rewards_train/margins_2": 0.6647385358810425, "step": 398 }, { "epoch": 1.19, "logps_train/policy_1_2": -256.9345703125, "logps_train/policy_1_l": -236.7863006591797, "logps_train/policy_1_w": -149.09451293945312, "logps_train/policy_2_2": -196.97976684570312, "logps_train/policy_2_w": -215.62716674804688, "logps_train/ref_1_2": -276.0, "logps_train/ref_1_l": -214.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -238.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.8502930402755737, "rewards_train/1-l": -2.292692184448242, "rewards_train/1-w": 3.7780489921569824, "rewards_train/2-2": 4.077022552490234, "rewards_train/2-w": 1.7997828722000122, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.070741176605225, "rewards_train/margins_1": 1.9277559518814087, "rewards_train/margins_2": 2.277239680290222, "step": 398 }, { "epoch": 1.19, "logps_train/policy_1_2": -140.05349731445312, "logps_train/policy_1_l": -184.52322387695312, "logps_train/policy_1_w": -62.791988372802734, "logps_train/policy_2_2": -117.220703125, "logps_train/policy_2_w": -84.56968688964844, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -86.5, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -100.5, "rewards_train/1-2": 1.1610567569732666, "rewards_train/1-l": -3.058377265930176, "rewards_train/1-w": 2.3895514011383057, "rewards_train/2-2": 2.2091798782348633, "rewards_train/2-w": 1.5953751802444458, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.4479286670684814, "rewards_train/margins_1": 1.228494644165039, "rewards_train/margins_2": 0.6138046979904175, "step": 398 }, { "epoch": 1.19, "logps_train/policy_1_2": -134.71458435058594, "logps_train/policy_1_l": -170.4837646484375, "logps_train/policy_1_w": -108.034912109375, "logps_train/policy_2_2": -103.1854476928711, "logps_train/policy_2_w": -149.7601776123047, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 0.8097917437553406, "rewards_train/1-l": -2.142517566680908, "rewards_train/1-w": 2.6027584075927734, "rewards_train/2-2": 2.0658297538757324, "rewards_train/2-w": 1.0896070003509521, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.745275974273682, "rewards_train/margins_1": 1.7929666638374329, "rewards_train/margins_2": 0.9762227535247803, "step": 398 }, { "epoch": 1.19, "logps_train/policy_1_2": -177.38568115234375, "logps_train/policy_1_l": -172.94168090820312, "logps_train/policy_1_w": -126.00042724609375, "logps_train/policy_2_2": -147.1048126220703, "logps_train/policy_2_w": -164.65853881835938, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.9676823616027832, "rewards_train/1-l": -2.698270320892334, "rewards_train/1-w": 3.776519298553467, "rewards_train/2-2": 3.1238937377929688, "rewards_train/2-w": 2.0630533695220947, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.474789619445801, "rewards_train/margins_1": 1.8088369369506836, "rewards_train/margins_2": 1.060840368270874, "step": 398 }, { "epoch": 1.19, "logps_train/policy_1_2": -171.04440307617188, "logps_train/policy_1_l": -235.2669677734375, "logps_train/policy_1_w": -183.94244384765625, "logps_train/policy_2_2": -129.0106964111328, "logps_train/policy_2_w": -245.20779418945312, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -258.0, "rewards_train/1-2": 1.5049360990524292, "rewards_train/1-l": -3.2829456329345703, "rewards_train/1-w": 4.04716157913208, "rewards_train/2-2": 2.7926807403564453, "rewards_train/2-w": 1.3089076280593872, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.33010721206665, "rewards_train/margins_1": 2.542225480079651, "rewards_train/margins_2": 1.483773112297058, "step": 399 }, { "epoch": 1.19, "logps_train/policy_1_2": -92.26255798339844, "logps_train/policy_1_l": -131.1536407470703, "logps_train/policy_1_w": -86.11918640136719, "logps_train/policy_2_2": -67.32344055175781, "logps_train/policy_2_w": -120.76799011230469, "logps_train/ref_1_2": -101.5, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -115.5, "logps_train/ref_2_2": -85.5, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.92052161693573, "rewards_train/1-l": -2.3528637886047363, "rewards_train/1-w": 2.9473586082458496, "rewards_train/2-2": 1.821659803390503, "rewards_train/2-w": 1.3397047519683838, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.300222396850586, "rewards_train/margins_1": 2.0268369913101196, "rewards_train/margins_2": 0.48195505142211914, "step": 399 }, { "epoch": 1.19, "logps_train/policy_1_2": -188.96005249023438, "logps_train/policy_1_l": -211.85350036621094, "logps_train/policy_1_w": -167.72865295410156, "logps_train/policy_2_2": -157.44334411621094, "logps_train/policy_2_w": -201.9736328125, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -211.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 3.0508711338043213, "rewards_train/1-l": -2.500584602355957, "rewards_train/1-w": 4.350279331207275, "rewards_train/2-2": 4.257228851318359, "rewards_train/2-w": 2.8096675872802734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.850863933563232, "rewards_train/margins_1": 1.299408197402954, "rewards_train/margins_2": 1.447561264038086, "step": 399 }, { "epoch": 1.19, "logps_train/policy_1_2": -141.80572509765625, "logps_train/policy_1_l": -215.63275146484375, "logps_train/policy_1_w": -118.77837371826172, "logps_train/policy_2_2": -100.54971313476562, "logps_train/policy_2_w": -168.80618286132812, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.1252880096435547, "rewards_train/1-l": -3.9737255573272705, "rewards_train/1-w": 2.9815378189086914, "rewards_train/2-2": 2.359091281890869, "rewards_train/2-w": 0.9834449887275696, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.955263376235962, "rewards_train/margins_1": 1.8562498092651367, "rewards_train/margins_2": 1.3756462931632996, "step": 399 }, { "epoch": 1.19, "logps_train/policy_1_2": -184.61380004882812, "logps_train/policy_1_l": -205.236083984375, "logps_train/policy_1_w": -140.1802978515625, "logps_train/policy_2_2": -144.77243041992188, "logps_train/policy_2_w": -185.526611328125, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 1.7479945421218872, "rewards_train/1-l": -1.8681399822235107, "rewards_train/1-w": 3.4475951194763184, "rewards_train/2-2": 3.272756814956665, "rewards_train/2-w": 1.7567148208618164, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.315735101699829, "rewards_train/margins_1": 1.6996005773544312, "rewards_train/margins_2": 1.5160419940948486, "step": 399 }, { "epoch": 1.19, "logps_train/policy_1_2": -192.28036499023438, "logps_train/policy_1_l": -123.93209838867188, "logps_train/policy_1_w": -135.50119018554688, "logps_train/policy_2_2": -170.16123962402344, "logps_train/policy_2_w": -168.13511657714844, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.6391515731811523, "rewards_train/1-l": -1.4830536842346191, "rewards_train/1-w": 3.612380266189575, "rewards_train/2-2": 2.5854382514953613, "rewards_train/2-w": 2.095862865447998, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.095433950424194, "rewards_train/margins_1": 1.9732286930084229, "rewards_train/margins_2": 0.4895753860473633, "step": 399 }, { "epoch": 1.19, "logps_train/policy_1_2": -172.39962768554688, "logps_train/policy_1_l": -108.93246459960938, "logps_train/policy_1_w": -123.92276000976562, "logps_train/policy_2_2": -128.0943603515625, "logps_train/policy_2_w": -161.42723083496094, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": 1.219802975654602, "rewards_train/1-l": -0.891097903251648, "rewards_train/1-w": 2.5756921768188477, "rewards_train/2-2": 3.1229851245880127, "rewards_train/2-w": 1.1861841678619385, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4667900800704956, "rewards_train/margins_1": 1.3558892011642456, "rewards_train/margins_2": 1.9368009567260742, "step": 399 }, { "epoch": 1.19, "logps_train/policy_1_2": -219.3090362548828, "logps_train/policy_1_l": -345.85064697265625, "logps_train/policy_1_w": -215.40359497070312, "logps_train/policy_2_2": -170.83453369140625, "logps_train/policy_2_w": -279.6256103515625, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -298.0, "logps_train/ref_1_w": -249.0, "logps_train/ref_2_2": -213.0, "logps_train/ref_2_w": -292.0, "rewards_train/1-2": 2.295659065246582, "rewards_train/1-l": -4.797562122344971, "rewards_train/1-w": 3.3783907890319824, "rewards_train/2-2": 4.2095160484313965, "rewards_train/2-w": 1.248375654220581, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 8.175952911376953, "rewards_train/margins_1": 1.0827317237854004, "rewards_train/margins_2": 2.9611403942108154, "step": 399 }, { "epoch": 1.2, "learning_rate": 1.912784885547541e-06, "loss": 0.4326, "step": 400 }, { "epoch": 1.2, "logps_train/policy_1_2": -127.83905792236328, "logps_train/policy_1_l": -183.231689453125, "logps_train/policy_1_w": -81.1239242553711, "logps_train/policy_2_2": -99.31477355957031, "logps_train/policy_2_w": -120.15444946289062, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.253204107284546, "rewards_train/1-l": -3.2028560638427734, "rewards_train/1-w": 3.3305764198303223, "rewards_train/2-2": 2.3033366203308105, "rewards_train/2-w": 1.9697118997573853, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.533432483673096, "rewards_train/margins_1": 2.0773723125457764, "rewards_train/margins_2": 0.3336247205734253, "step": 400 }, { "epoch": 1.2, "logps_train/policy_1_2": -125.09384155273438, "logps_train/policy_1_l": -141.77349853515625, "logps_train/policy_1_w": -80.59821319580078, "logps_train/policy_2_2": -96.50926971435547, "logps_train/policy_2_w": -103.02265930175781, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -121.5, "rewards_train/1-2": 1.5804591178894043, "rewards_train/1-l": -2.0187571048736572, "rewards_train/1-w": 3.002678871154785, "rewards_train/2-2": 2.537353992462158, "rewards_train/2-w": 1.847733736038208, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.021435976028442, "rewards_train/margins_1": 1.4222197532653809, "rewards_train/margins_2": 0.6896202564239502, "step": 400 }, { "epoch": 1.2, "logps_train/policy_1_2": -61.06365203857422, "logps_train/policy_1_l": -136.6040496826172, "logps_train/policy_1_w": -154.4970245361328, "logps_train/policy_2_2": -52.13838195800781, "logps_train/policy_2_w": -187.7745361328125, "logps_train/ref_1_2": -77.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -71.5, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": 1.6274242401123047, "rewards_train/1-l": -2.2445855140686035, "rewards_train/1-w": 2.5190470218658447, "rewards_train/2-2": 1.9281537532806396, "rewards_train/2-w": 1.194421410560608, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.763632535934448, "rewards_train/margins_1": 0.89162278175354, "rewards_train/margins_2": 0.7337323427200317, "step": 400 }, { "epoch": 1.2, "logps_train/policy_1_2": -186.26466369628906, "logps_train/policy_1_l": -248.52264404296875, "logps_train/policy_1_w": -151.42286682128906, "logps_train/policy_2_2": -148.54855346679688, "logps_train/policy_2_w": -193.474853515625, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 2.3461899757385254, "rewards_train/1-l": -3.8366751670837402, "rewards_train/1-w": 3.3163070678710938, "rewards_train/2-2": 3.637331962585449, "rewards_train/2-w": 1.8181400299072266, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.152982234954834, "rewards_train/margins_1": 0.9701170921325684, "rewards_train/margins_2": 1.8191919326782227, "step": 400 }, { "epoch": 1.2, "logps_train/policy_1_2": -236.71502685546875, "logps_train/policy_1_l": -276.2616271972656, "logps_train/policy_1_w": -210.13571166992188, "logps_train/policy_2_2": -201.34197998046875, "logps_train/policy_2_w": -250.10255432128906, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -254.0, "logps_train/ref_1_w": -246.0, "logps_train/ref_2_2": -240.0, "logps_train/ref_2_w": -272.0, "rewards_train/1-2": 2.4222474098205566, "rewards_train/1-l": -2.2362213134765625, "rewards_train/1-w": 3.563870429992676, "rewards_train/2-2": 3.86580228805542, "rewards_train/2-w": 2.2413082122802734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.800091743469238, "rewards_train/margins_1": 1.1416230201721191, "rewards_train/margins_2": 1.6244940757751465, "step": 400 }, { "epoch": 1.2, "logps_train/policy_1_2": -158.1081085205078, "logps_train/policy_1_l": -125.62957000732422, "logps_train/policy_1_w": -116.6994400024414, "logps_train/policy_2_2": -127.03707885742188, "logps_train/policy_2_w": -166.46969604492188, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.4149706363677979, "rewards_train/1-l": -1.979948878288269, "rewards_train/1-w": 3.4331815242767334, "rewards_train/2-2": 2.858793258666992, "rewards_train/2-w": 0.9131869077682495, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.413130402565002, "rewards_train/margins_1": 2.0182108879089355, "rewards_train/margins_2": 1.9456063508987427, "step": 400 }, { "epoch": 1.2, "logps_train/policy_1_2": -227.4790496826172, "logps_train/policy_1_l": -233.2775421142578, "logps_train/policy_1_w": -183.405029296875, "logps_train/policy_2_2": -185.5625, "logps_train/policy_2_w": -232.04525756835938, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -222.0, "logps_train/ref_2_2": -223.0, "logps_train/ref_2_w": -252.0, "rewards_train/1-2": 1.9813917875289917, "rewards_train/1-l": -2.6797080039978027, "rewards_train/1-w": 3.909497022628784, "rewards_train/2-2": 3.7558586597442627, "rewards_train/2-w": 2.0017244815826416, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.589205026626587, "rewards_train/margins_1": 1.9281052350997925, "rewards_train/margins_2": 1.754134178161621, "step": 400 }, { "epoch": 1.2, "logps_train/policy_1_2": -145.25827026367188, "logps_train/policy_1_l": -192.82754516601562, "logps_train/policy_1_w": -152.57827758789062, "logps_train/policy_2_2": -119.78694152832031, "logps_train/policy_2_w": -177.99053955078125, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": 2.4616734981536865, "rewards_train/1-l": -2.7018961906433105, "rewards_train/1-w": 3.303499698638916, "rewards_train/2-2": 3.240838050842285, "rewards_train/2-w": 1.8978221416473389, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.005395889282227, "rewards_train/margins_1": 0.8418262004852295, "rewards_train/margins_2": 1.3430159091949463, "step": 400 }, { "epoch": 1.2, "logps_train/policy_1_2": -136.83889770507812, "logps_train/policy_1_l": -210.24530029296875, "logps_train/policy_1_w": -150.30743408203125, "logps_train/policy_2_2": -108.05856323242188, "logps_train/policy_2_w": -192.4282989501953, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 2.0567357540130615, "rewards_train/1-l": -2.9089057445526123, "rewards_train/1-w": 4.041130542755127, "rewards_train/2-2": 3.0753936767578125, "rewards_train/2-w": 2.3915441036224365, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.950036287307739, "rewards_train/margins_1": 1.9843947887420654, "rewards_train/margins_2": 0.683849573135376, "step": 401 }, { "epoch": 1.2, "logps_train/policy_1_2": -201.32745361328125, "logps_train/policy_1_l": -284.97418212890625, "logps_train/policy_1_w": -160.55856323242188, "logps_train/policy_2_2": -165.95941162109375, "logps_train/policy_2_w": -196.74154663085938, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -262.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 2.1985039710998535, "rewards_train/1-l": -2.3177318572998047, "rewards_train/1-w": 3.96445631980896, "rewards_train/2-2": 3.329059362411499, "rewards_train/2-w": 2.735220193862915, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.282188177108765, "rewards_train/margins_1": 1.7659523487091064, "rewards_train/margins_2": 0.593839168548584, "step": 401 }, { "epoch": 1.2, "logps_train/policy_1_2": -223.5067138671875, "logps_train/policy_1_l": -194.90408325195312, "logps_train/policy_1_w": -179.8560791015625, "logps_train/policy_2_2": -174.41860961914062, "logps_train/policy_2_w": -227.05227661132812, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 1.7352652549743652, "rewards_train/1-l": -2.1888465881347656, "rewards_train/1-w": 3.867516040802002, "rewards_train/2-2": 3.72454571723938, "rewards_train/2-w": 1.8510210514068604, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.056362628936768, "rewards_train/margins_1": 2.1322507858276367, "rewards_train/margins_2": 1.8735246658325195, "step": 401 }, { "epoch": 1.2, "logps_train/policy_1_2": -121.17372131347656, "logps_train/policy_1_l": -103.41529846191406, "logps_train/policy_1_w": -119.13229370117188, "logps_train/policy_2_2": -90.40750122070312, "logps_train/policy_2_w": -156.47564697265625, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.2545034885406494, "rewards_train/1-l": -2.0430922508239746, "rewards_train/1-w": 4.899270534515381, "rewards_train/2-2": 2.806124687194824, "rewards_train/2-w": 3.17665433883667, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.9423627853393555, "rewards_train/margins_1": 3.6447670459747314, "rewards_train/margins_2": -0.3705296516418457, "step": 401 }, { "epoch": 1.2, "logps_train/policy_1_2": -122.8368911743164, "logps_train/policy_1_l": -113.08030700683594, "logps_train/policy_1_w": -63.27170944213867, "logps_train/policy_2_2": -92.41748046875, "logps_train/policy_2_w": -85.15604400634766, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 0.9233425855636597, "rewards_train/1-l": -2.386155843734741, "rewards_train/1-w": 2.096266746520996, "rewards_train/2-2": 2.2738771438598633, "rewards_train/2-w": 1.2718955278396606, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.482422590255737, "rewards_train/margins_1": 1.1729241609573364, "rewards_train/margins_2": 1.0019816160202026, "step": 401 }, { "epoch": 1.2, "logps_train/policy_1_2": -83.28927612304688, "logps_train/policy_1_l": -131.46932983398438, "logps_train/policy_1_w": -78.20249938964844, "logps_train/policy_2_2": -63.45428466796875, "logps_train/policy_2_w": -106.56990051269531, "logps_train/ref_1_2": -93.5, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -81.5, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 1.017166018486023, "rewards_train/1-l": -2.4233970642089844, "rewards_train/1-w": 2.579066038131714, "rewards_train/2-2": 1.781524896621704, "rewards_train/2-w": 1.3891041278839111, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.002463102340698, "rewards_train/margins_1": 1.561900019645691, "rewards_train/margins_2": 0.39242076873779297, "step": 401 }, { "epoch": 1.2, "logps_train/policy_1_2": -94.07073974609375, "logps_train/policy_1_l": -98.78805541992188, "logps_train/policy_1_w": -61.4589729309082, "logps_train/policy_2_2": -70.06034088134766, "logps_train/policy_2_w": -79.70765686035156, "logps_train/ref_1_2": -103.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -83.0, "logps_train/ref_2_2": -90.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": 0.9058169722557068, "rewards_train/1-l": -1.494747281074524, "rewards_train/1-w": 2.153589963912964, "rewards_train/2-2": 1.9752163887023926, "rewards_train/2-w": 1.3557968139648438, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.648337244987488, "rewards_train/margins_1": 1.247772991657257, "rewards_train/margins_2": 0.6194195747375488, "step": 401 }, { "epoch": 1.2, "logps_train/policy_1_2": -161.8111572265625, "logps_train/policy_1_l": -236.17630004882812, "logps_train/policy_1_w": -156.76263427734375, "logps_train/policy_2_2": -129.9375762939453, "logps_train/policy_2_w": -191.2764129638672, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -215.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.9407601356506348, "rewards_train/1-l": -2.091848373413086, "rewards_train/1-w": 3.4995176792144775, "rewards_train/2-2": 3.2124927043914795, "rewards_train/2-w": 1.985640048980713, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.5913660526275635, "rewards_train/margins_1": 1.5587575435638428, "rewards_train/margins_2": 1.2268526554107666, "step": 401 }, { "epoch": 1.2, "learning_rate": 1.8888068558035435e-06, "loss": 0.4545, "step": 402 }, { "epoch": 1.2, "logps_train/policy_1_2": -309.7398376464844, "logps_train/policy_1_l": -221.14483642578125, "logps_train/policy_1_w": -229.0938720703125, "logps_train/policy_2_2": -251.9925537109375, "logps_train/policy_2_w": -300.18060302734375, "logps_train/ref_1_2": -336.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -278.0, "logps_train/ref_2_2": -304.0, "logps_train/ref_2_w": -322.0, "rewards_train/1-2": 2.544766426086426, "rewards_train/1-l": -2.2957334518432617, "rewards_train/1-w": 4.89686393737793, "rewards_train/2-2": 5.160120487213135, "rewards_train/2-w": 2.063188314437866, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.192597389221191, "rewards_train/margins_1": 2.352097511291504, "rewards_train/margins_2": 3.0969321727752686, "step": 402 }, { "epoch": 1.2, "logps_train/policy_1_2": -181.46212768554688, "logps_train/policy_1_l": -170.020263671875, "logps_train/policy_1_w": -95.64839172363281, "logps_train/policy_2_2": -141.89772033691406, "logps_train/policy_2_w": -131.08380126953125, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.3006614446640015, "rewards_train/1-l": -3.3660888671875, "rewards_train/1-w": 3.2445356845855713, "rewards_train/2-2": 2.966477870941162, "rewards_train/2-w": 2.083806037902832, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.610624551773071, "rewards_train/margins_1": 1.9438742399215698, "rewards_train/margins_2": 0.8826718330383301, "step": 402 }, { "epoch": 1.2, "logps_train/policy_1_2": -251.29513549804688, "logps_train/policy_1_l": -190.8314971923828, "logps_train/policy_1_w": -142.79901123046875, "logps_train/policy_2_2": -191.46139526367188, "logps_train/policy_2_w": -199.70114135742188, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -226.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 0.8376740217208862, "rewards_train/1-l": -1.5483835935592651, "rewards_train/1-w": 3.103302001953125, "rewards_train/2-2": 3.4511260986328125, "rewards_train/2-w": 1.1091835498809814, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.65168559551239, "rewards_train/margins_1": 2.2656279802322388, "rewards_train/margins_2": 2.341942548751831, "step": 402 }, { "epoch": 1.2, "logps_train/policy_1_2": -143.4882354736328, "logps_train/policy_1_l": -174.25650024414062, "logps_train/policy_1_w": -116.20455169677734, "logps_train/policy_2_2": -114.62811279296875, "logps_train/policy_2_w": -147.3706512451172, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 0.9597706198692322, "rewards_train/1-l": -2.5262362957000732, "rewards_train/1-w": 1.9002479314804077, "rewards_train/2-2": 1.90086030960083, "rewards_train/2-w": 0.8574658036231995, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.426484227180481, "rewards_train/margins_1": 0.9404773116111755, "rewards_train/margins_2": 1.0433945059776306, "step": 402 }, { "epoch": 1.2, "logps_train/policy_1_2": -178.98922729492188, "logps_train/policy_1_l": -141.009521484375, "logps_train/policy_1_w": -120.86067199707031, "logps_train/policy_2_2": -148.69117736816406, "logps_train/policy_2_w": -154.44876098632812, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": 1.7596715688705444, "rewards_train/1-l": -2.02321720123291, "rewards_train/1-w": 2.8877615928649902, "rewards_train/2-2": 3.1402573585510254, "rewards_train/2-w": 1.6090284585952759, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.9109787940979, "rewards_train/margins_1": 1.1280900239944458, "rewards_train/margins_2": 1.5312288999557495, "step": 402 }, { "epoch": 1.2, "logps_train/policy_1_2": -76.13883972167969, "logps_train/policy_1_l": -166.95266723632812, "logps_train/policy_1_w": -88.83602905273438, "logps_train/policy_2_2": -63.282203674316406, "logps_train/policy_2_w": -111.70463562011719, "logps_train/ref_1_2": -92.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -83.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 1.562678575515747, "rewards_train/1-l": -2.797610282897949, "rewards_train/1-w": 2.7490153312683105, "rewards_train/2-2": 1.9827170372009277, "rewards_train/2-w": 1.5394971370697021, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.54662561416626, "rewards_train/margins_1": 1.1863367557525635, "rewards_train/margins_2": 0.4432199001312256, "step": 402 }, { "epoch": 1.2, "logps_train/policy_1_2": -170.912353515625, "logps_train/policy_1_l": -252.7242889404297, "logps_train/policy_1_w": -170.54763793945312, "logps_train/policy_2_2": -131.94004821777344, "logps_train/policy_2_w": -224.9598388671875, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -224.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -239.0, "rewards_train/1-2": 1.4650137424468994, "rewards_train/1-l": -2.8411779403686523, "rewards_train/1-w": 3.388204336166382, "rewards_train/2-2": 2.724745035171509, "rewards_train/2-w": 1.3626092672348022, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.229382276535034, "rewards_train/margins_1": 1.9231905937194824, "rewards_train/margins_2": 1.3621357679367065, "step": 402 }, { "epoch": 1.2, "logps_train/policy_1_2": -224.97909545898438, "logps_train/policy_1_l": -256.31011962890625, "logps_train/policy_1_w": -190.8563232421875, "logps_train/policy_2_2": -177.21859741210938, "logps_train/policy_2_w": -238.96832275390625, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -221.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 2.4587299823760986, "rewards_train/1-l": -2.987651824951172, "rewards_train/1-w": 3.0338993072509766, "rewards_train/2-2": 3.8745274543762207, "rewards_train/2-w": 0.8867623805999756, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.021551132202148, "rewards_train/margins_1": 0.5751693248748779, "rewards_train/margins_2": 2.987765073776245, "step": 402 }, { "epoch": 1.21, "logps_train/policy_1_2": -115.4830093383789, "logps_train/policy_1_l": -85.72853088378906, "logps_train/policy_1_w": -46.1910285949707, "logps_train/policy_2_2": -79.79754638671875, "logps_train/policy_2_w": -69.44877624511719, "logps_train/ref_1_2": -122.5, "logps_train/ref_1_l": -70.5, "logps_train/ref_1_w": -66.0, "logps_train/ref_2_2": -101.5, "logps_train/ref_2_w": -79.5, "rewards_train/1-2": 0.6876366138458252, "rewards_train/1-l": -1.528322458267212, "rewards_train/1-w": 1.969178318977356, "rewards_train/2-2": 2.1680967807769775, "rewards_train/2-w": 1.0091261863708496, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.497500777244568, "rewards_train/margins_1": 1.2815417051315308, "rewards_train/margins_2": 1.158970594406128, "step": 403 }, { "epoch": 1.21, "logps_train/policy_1_2": -124.07575225830078, "logps_train/policy_1_l": -136.65164184570312, "logps_train/policy_1_w": -113.19728088378906, "logps_train/policy_2_2": -102.86676788330078, "logps_train/policy_2_w": -147.10479736328125, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 2.0721120834350586, "rewards_train/1-l": -1.6073517799377441, "rewards_train/1-w": 3.177147388458252, "rewards_train/2-2": 2.7398862838745117, "rewards_train/2-w": 1.5785822868347168, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.784499168395996, "rewards_train/margins_1": 1.1050353050231934, "rewards_train/margins_2": 1.161303997039795, "step": 403 }, { "epoch": 1.21, "logps_train/policy_1_2": -93.56414794921875, "logps_train/policy_1_l": -117.09577941894531, "logps_train/policy_1_w": -85.29853820800781, "logps_train/policy_2_2": -62.22797775268555, "logps_train/policy_2_w": -129.84483337402344, "logps_train/ref_1_2": -101.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -81.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.738116979598999, "rewards_train/1-l": -1.4096267223358154, "rewards_train/1-w": 2.6217081546783447, "rewards_train/2-2": 1.9236867427825928, "rewards_train/2-w": 0.636610746383667, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.03133487701416, "rewards_train/margins_1": 1.8835911750793457, "rewards_train/margins_2": 1.2870759963989258, "step": 403 }, { "epoch": 1.21, "logps_train/policy_1_2": -182.46453857421875, "logps_train/policy_1_l": -208.28131103515625, "logps_train/policy_1_w": -142.87547302246094, "logps_train/policy_2_2": -156.37159729003906, "logps_train/policy_2_w": -176.44662475585938, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 2.24104642868042, "rewards_train/1-l": -2.1312570571899414, "rewards_train/1-w": 3.2077651023864746, "rewards_train/2-2": 3.4737777709960938, "rewards_train/2-w": 1.7678380012512207, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.339022159576416, "rewards_train/margins_1": 0.9667186737060547, "rewards_train/margins_2": 1.705939769744873, "step": 403 }, { "epoch": 1.21, "logps_train/policy_1_2": -156.2481689453125, "logps_train/policy_1_l": -189.7913818359375, "logps_train/policy_1_w": -125.26467895507812, "logps_train/policy_2_2": -109.86038208007812, "logps_train/policy_2_w": -178.72119140625, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.408776879310608, "rewards_train/1-l": -2.53695011138916, "rewards_train/1-w": 3.1882781982421875, "rewards_train/2-2": 3.05419659614563, "rewards_train/2-w": 1.0774903297424316, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.725228309631348, "rewards_train/margins_1": 1.7795013189315796, "rewards_train/margins_2": 1.9767062664031982, "step": 403 }, { "epoch": 1.21, "logps_train/policy_1_2": -132.16900634765625, "logps_train/policy_1_l": -174.94412231445312, "logps_train/policy_1_w": -112.26122283935547, "logps_train/policy_2_2": -92.5712890625, "logps_train/policy_2_w": -144.85214233398438, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.3440371751785278, "rewards_train/1-l": -2.813943862915039, "rewards_train/1-w": 2.53598690032959, "rewards_train/2-2": 2.3944332599639893, "rewards_train/2-w": 0.8874411582946777, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.349930763244629, "rewards_train/margins_1": 1.191949725151062, "rewards_train/margins_2": 1.5069921016693115, "step": 403 }, { "epoch": 1.21, "logps_train/policy_1_2": -117.62283325195312, "logps_train/policy_1_l": -152.05426025390625, "logps_train/policy_1_w": -108.02178955078125, "logps_train/policy_2_2": -92.73771667480469, "logps_train/policy_2_w": -151.12631225585938, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.4111539125442505, "rewards_train/1-l": -1.9280822277069092, "rewards_train/1-w": 2.965007781982422, "rewards_train/2-2": 2.3347244262695312, "rewards_train/2-w": 1.4005520343780518, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.893090009689331, "rewards_train/margins_1": 1.5538538694381714, "rewards_train/margins_2": 0.9341723918914795, "step": 403 }, { "epoch": 1.21, "logps_train/policy_1_2": -126.65274047851562, "logps_train/policy_1_l": -198.0445556640625, "logps_train/policy_1_w": -52.77232360839844, "logps_train/policy_2_2": -96.204345703125, "logps_train/policy_2_w": -71.0066146850586, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -75.5, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -88.0, "rewards_train/1-2": 1.4151947498321533, "rewards_train/1-l": -2.627502679824829, "rewards_train/1-w": 2.268080234527588, "rewards_train/2-2": 2.663158893585205, "rewards_train/2-w": 1.7165262699127197, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.895582914352417, "rewards_train/margins_1": 0.8528854846954346, "rewards_train/margins_2": 0.9466326236724854, "step": 403 }, { "epoch": 1.21, "learning_rate": 1.8648884774329526e-06, "loss": 0.4309, "step": 404 }, { "epoch": 1.21, "logps_train/policy_1_2": -162.8629150390625, "logps_train/policy_1_l": -191.92117309570312, "logps_train/policy_1_w": -128.2936248779297, "logps_train/policy_2_2": -117.69851684570312, "logps_train/policy_2_w": -189.75741577148438, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 0.9160518646240234, "rewards_train/1-l": -2.6373322010040283, "rewards_train/1-w": 3.108919143676758, "rewards_train/2-2": 2.5469443798065186, "rewards_train/2-w": 1.1144940853118896, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.746251344680786, "rewards_train/margins_1": 2.1928672790527344, "rewards_train/margins_2": 1.432450294494629, "step": 404 }, { "epoch": 1.21, "logps_train/policy_1_2": -155.7219696044922, "logps_train/policy_1_l": -247.7228240966797, "logps_train/policy_1_w": -232.47222900390625, "logps_train/policy_2_2": -120.84740447998047, "logps_train/policy_2_w": -283.4233703613281, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -278.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -304.0, "rewards_train/1-2": 1.834052562713623, "rewards_train/1-l": -2.2804861068725586, "rewards_train/1-w": 4.574653625488281, "rewards_train/2-2": 2.832447052001953, "rewards_train/2-w": 2.092038154602051, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.85513973236084, "rewards_train/margins_1": 2.740601062774658, "rewards_train/margins_2": 0.7404088973999023, "step": 404 }, { "epoch": 1.21, "logps_train/policy_1_2": -146.2894744873047, "logps_train/policy_1_l": -213.32168579101562, "logps_train/policy_1_w": -106.45211791992188, "logps_train/policy_2_2": -115.55722045898438, "logps_train/policy_2_w": -153.41490173339844, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.340193271636963, "rewards_train/1-l": -3.5202550888061523, "rewards_train/1-w": 4.121976375579834, "rewards_train/2-2": 2.524747371673584, "rewards_train/2-w": 2.400697708129883, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.642231464385986, "rewards_train/margins_1": 2.781783103942871, "rewards_train/margins_2": 0.12404966354370117, "step": 404 }, { "epoch": 1.21, "logps_train/policy_1_2": -235.47007751464844, "logps_train/policy_1_l": -280.0836181640625, "logps_train/policy_1_w": -226.82220458984375, "logps_train/policy_2_2": -187.5015869140625, "logps_train/policy_2_w": -293.1970520019531, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -236.0, "logps_train/ref_1_w": -282.0, "logps_train/ref_2_2": -229.0, "logps_train/ref_2_w": -320.0, "rewards_train/1-2": 2.415491819381714, "rewards_train/1-l": -4.455236434936523, "rewards_train/1-w": 5.5677809715271, "rewards_train/2-2": 4.124840259552002, "rewards_train/2-w": 2.5865447521209717, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 10.023017406463623, "rewards_train/margins_1": 3.1522891521453857, "rewards_train/margins_2": 1.5382955074310303, "step": 404 }, { "epoch": 1.21, "logps_train/policy_1_2": -142.59817504882812, "logps_train/policy_1_l": -183.44580078125, "logps_train/policy_1_w": -130.3383331298828, "logps_train/policy_2_2": -114.68995666503906, "logps_train/policy_2_w": -169.23080444335938, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.6108858585357666, "rewards_train/1-l": -2.1052722930908203, "rewards_train/1-w": 2.6216354370117188, "rewards_train/2-2": 2.6825664043426514, "rewards_train/2-w": 1.2976219654083252, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.726907730102539, "rewards_train/margins_1": 1.0107495784759521, "rewards_train/margins_2": 1.3849444389343262, "step": 404 }, { "epoch": 1.21, "logps_train/policy_1_2": -122.62194061279297, "logps_train/policy_1_l": -134.8964080810547, "logps_train/policy_1_w": -107.09871673583984, "logps_train/policy_2_2": -91.12879943847656, "logps_train/policy_2_w": -141.97064208984375, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -118.5, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.5003058910369873, "rewards_train/1-l": -1.6318285465240479, "rewards_train/1-w": 2.999112844467163, "rewards_train/2-2": 2.7570419311523438, "rewards_train/2-w": 1.4443426132202148, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.630941390991211, "rewards_train/margins_1": 1.4988069534301758, "rewards_train/margins_2": 1.312699317932129, "step": 404 }, { "epoch": 1.21, "logps_train/policy_1_2": -192.0946502685547, "logps_train/policy_1_l": -249.633544921875, "logps_train/policy_1_w": -222.17312622070312, "logps_train/policy_2_2": -153.40261840820312, "logps_train/policy_2_w": -269.3586730957031, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -262.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -290.0, "rewards_train/1-2": 2.3264732360839844, "rewards_train/1-l": -2.6758532524108887, "rewards_train/1-w": 4.036789417266846, "rewards_train/2-2": 3.7456750869750977, "rewards_train/2-w": 1.9912800788879395, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.712642669677734, "rewards_train/margins_1": 1.7103161811828613, "rewards_train/margins_2": 1.7543950080871582, "step": 404 }, { "epoch": 1.21, "logps_train/policy_1_2": -65.77996826171875, "logps_train/policy_1_l": -63.81379699707031, "logps_train/policy_1_w": -46.86533737182617, "logps_train/policy_2_2": -46.741004943847656, "logps_train/policy_2_w": -70.87152862548828, "logps_train/ref_1_2": -78.0, "logps_train/ref_1_l": -49.5, "logps_train/ref_1_w": -67.5, "logps_train/ref_2_2": -66.5, "logps_train/ref_2_w": -79.0, "rewards_train/1-2": 1.193096399307251, "rewards_train/1-l": -1.419856309890747, "rewards_train/1-w": 2.0522115230560303, "rewards_train/2-2": 1.985469937324524, "rewards_train/2-w": 0.8356987237930298, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4720678329467773, "rewards_train/margins_1": 0.8591151237487793, "rewards_train/margins_2": 1.1497712135314941, "step": 404 }, { "epoch": 1.21, "logps_train/policy_1_2": -122.19921112060547, "logps_train/policy_1_l": -120.2967300415039, "logps_train/policy_1_w": -63.55927276611328, "logps_train/policy_2_2": -86.5599594116211, "logps_train/policy_2_w": -89.81883239746094, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -107.5, "rewards_train/1-2": 0.6144537329673767, "rewards_train/1-l": -2.1865086555480957, "rewards_train/1-w": 2.876885414123535, "rewards_train/2-2": 2.2955660820007324, "rewards_train/2-w": 1.772803544998169, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.063394069671631, "rewards_train/margins_1": 2.2624316811561584, "rewards_train/margins_2": 0.5227625370025635, "step": 405 }, { "epoch": 1.21, "logps_train/policy_1_2": -97.47683715820312, "logps_train/policy_1_l": -113.80451965332031, "logps_train/policy_1_w": -69.82479858398438, "logps_train/policy_2_2": -79.86587524414062, "logps_train/policy_2_w": -86.99301147460938, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -103.0, "rewards_train/1-2": 1.1720430850982666, "rewards_train/1-l": -1.715608835220337, "rewards_train/1-w": 2.1255273818969727, "rewards_train/2-2": 1.8593109846115112, "rewards_train/2-w": 1.6259920597076416, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.8411362171173096, "rewards_train/margins_1": 0.953484296798706, "rewards_train/margins_2": 0.23331892490386963, "step": 405 }, { "epoch": 1.21, "logps_train/policy_1_2": -129.79562377929688, "logps_train/policy_1_l": -183.4201202392578, "logps_train/policy_1_w": -105.09693908691406, "logps_train/policy_2_2": -99.13241577148438, "logps_train/policy_2_w": -139.13052368164062, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 1.6298117637634277, "rewards_train/1-l": -3.032832622528076, "rewards_train/1-w": 3.2848377227783203, "rewards_train/2-2": 2.682851791381836, "rewards_train/2-w": 1.9783544540405273, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.3176703453063965, "rewards_train/margins_1": 1.6550259590148926, "rewards_train/margins_2": 0.7044973373413086, "step": 405 }, { "epoch": 1.21, "logps_train/policy_1_2": -77.6358642578125, "logps_train/policy_1_l": -69.84480285644531, "logps_train/policy_1_w": -52.5662841796875, "logps_train/policy_2_2": -59.02019500732422, "logps_train/policy_2_w": -74.72175598144531, "logps_train/ref_1_2": -88.5, "logps_train/ref_1_l": -55.5, "logps_train/ref_1_w": -74.0, "logps_train/ref_2_2": -78.0, "logps_train/ref_2_w": -89.5, "rewards_train/1-2": 1.09735107421875, "rewards_train/1-l": -1.4256911277770996, "rewards_train/1-w": 2.141418933868408, "rewards_train/2-2": 1.8831367492675781, "rewards_train/2-w": 1.473917841911316, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.567110061645508, "rewards_train/margins_1": 1.0440678596496582, "rewards_train/margins_2": 0.4092189073562622, "step": 405 }, { "epoch": 1.21, "logps_train/policy_1_2": -123.68453979492188, "logps_train/policy_1_l": -135.0167236328125, "logps_train/policy_1_w": -97.93616485595703, "logps_train/policy_2_2": -85.2127685546875, "logps_train/policy_2_w": -134.0350799560547, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.5268577337265015, "rewards_train/1-l": -1.360851764678955, "rewards_train/1-w": 3.1501333713531494, "rewards_train/2-2": 2.633410930633545, "rewards_train/2-w": 1.6027429103851318, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.5109851360321045, "rewards_train/margins_1": 1.623275637626648, "rewards_train/margins_2": 1.030668020248413, "step": 405 }, { "epoch": 1.21, "logps_train/policy_1_2": -89.20269775390625, "logps_train/policy_1_l": -76.7616958618164, "logps_train/policy_1_w": -49.59716796875, "logps_train/policy_2_2": -63.89777374267578, "logps_train/policy_2_w": -80.42276763916016, "logps_train/ref_1_2": -99.0, "logps_train/ref_1_l": -65.0, "logps_train/ref_1_w": -69.0, "logps_train/ref_2_2": -83.0, "logps_train/ref_2_w": -89.5, "rewards_train/1-2": 0.9422299861907959, "rewards_train/1-l": -1.1785131692886353, "rewards_train/1-w": 1.940576195716858, "rewards_train/2-2": 1.8684253692626953, "rewards_train/2-w": 0.9149498343467712, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.119089365005493, "rewards_train/margins_1": 0.998346209526062, "rewards_train/margins_2": 0.9534755349159241, "step": 405 }, { "epoch": 1.21, "logps_train/policy_1_2": -195.43798828125, "logps_train/policy_1_l": -211.74241638183594, "logps_train/policy_1_w": -92.77046203613281, "logps_train/policy_2_2": -154.8873291015625, "logps_train/policy_2_w": -112.20047760009766, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 0.8522964715957642, "rewards_train/1-l": -3.086937427520752, "rewards_train/1-w": 2.3127975463867188, "rewards_train/2-2": 2.768688440322876, "rewards_train/2-w": 1.7424519062042236, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.399734973907471, "rewards_train/margins_1": 1.4605010747909546, "rewards_train/margins_2": 1.0262365341186523, "step": 405 }, { "epoch": 1.21, "logps_train/policy_1_2": -108.64854431152344, "logps_train/policy_1_l": -160.22508239746094, "logps_train/policy_1_w": -97.62934875488281, "logps_train/policy_2_2": -91.69037628173828, "logps_train/policy_2_w": -137.392822265625, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 2.026552200317383, "rewards_train/1-l": -1.9674304723739624, "rewards_train/1-w": 3.0136280059814453, "rewards_train/2-2": 2.7543997764587402, "rewards_train/2-w": 1.2622809410095215, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.981058478355408, "rewards_train/margins_1": 0.9870758056640625, "rewards_train/margins_2": 1.4921188354492188, "step": 405 }, { "epoch": 1.22, "learning_rate": 1.8410320848273315e-06, "loss": 0.4494, "step": 406 }, { "epoch": 1.22, "logps_train/policy_1_2": -229.71817016601562, "logps_train/policy_1_l": -243.02545166015625, "logps_train/policy_1_w": -156.68035888671875, "logps_train/policy_2_2": -193.69606018066406, "logps_train/policy_2_w": -201.09735107421875, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -197.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 2.0594329833984375, "rewards_train/1-l": -3.4736382961273193, "rewards_train/1-w": 4.010089874267578, "rewards_train/2-2": 3.449143648147583, "rewards_train/2-w": 2.452765941619873, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.4837281703948975, "rewards_train/margins_1": 1.9506568908691406, "rewards_train/margins_2": 0.99637770652771, "step": 406 }, { "epoch": 1.22, "logps_train/policy_1_2": -187.17208862304688, "logps_train/policy_1_l": -193.053466796875, "logps_train/policy_1_w": -90.40827941894531, "logps_train/policy_2_2": -135.23989868164062, "logps_train/policy_2_w": -141.3881072998047, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.010915994644165, "rewards_train/1-l": -3.589721202850342, "rewards_train/1-w": 3.8779218196868896, "rewards_train/2-2": 3.2228844165802, "rewards_train/2-w": 1.5861893892288208, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.4676430225372314, "rewards_train/margins_1": 2.8670058250427246, "rewards_train/margins_2": 1.6366950273513794, "step": 406 }, { "epoch": 1.22, "logps_train/policy_1_2": -109.73480987548828, "logps_train/policy_1_l": -139.46490478515625, "logps_train/policy_1_w": -98.29270935058594, "logps_train/policy_2_2": -83.45701599121094, "logps_train/policy_2_w": -131.66726684570312, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -105.5, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.0327692031860352, "rewards_train/1-l": -1.824616551399231, "rewards_train/1-w": 2.4445571899414062, "rewards_train/2-2": 2.207423210144043, "rewards_train/2-w": 1.2184290885925293, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.269173741340637, "rewards_train/margins_1": 1.411787986755371, "rewards_train/margins_2": 0.9889941215515137, "step": 406 }, { "epoch": 1.22, "logps_train/policy_1_2": -212.3307342529297, "logps_train/policy_1_l": -164.4834747314453, "logps_train/policy_1_w": -153.94847106933594, "logps_train/policy_2_2": -172.58358764648438, "logps_train/policy_2_w": -195.15408325195312, "logps_train/ref_1_2": -229.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.6716148853302002, "rewards_train/1-l": -1.2233473062515259, "rewards_train/1-w": 3.0774192810058594, "rewards_train/2-2": 3.6537506580352783, "rewards_train/2-w": 1.691624402999878, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.300766587257385, "rewards_train/margins_1": 1.4058043956756592, "rewards_train/margins_2": 1.9621262550354004, "step": 406 }, { "epoch": 1.22, "logps_train/policy_1_2": -102.61514282226562, "logps_train/policy_1_l": -99.1054458618164, "logps_train/policy_1_w": -68.05713653564453, "logps_train/policy_2_2": -67.36161041259766, "logps_train/policy_2_w": -112.33120727539062, "logps_train/ref_1_2": -106.5, "logps_train/ref_1_l": -77.5, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -88.5, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 0.3966885209083557, "rewards_train/1-l": -2.1763648986816406, "rewards_train/1-w": 2.319286346435547, "rewards_train/2-2": 2.1228232383728027, "rewards_train/2-w": 0.6407074928283691, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.4956512451171875, "rewards_train/margins_1": 1.9225978255271912, "rewards_train/margins_2": 1.4821157455444336, "step": 406 }, { "epoch": 1.22, "logps_train/policy_1_2": -95.87129211425781, "logps_train/policy_1_l": -218.89608764648438, "logps_train/policy_1_w": -75.75178527832031, "logps_train/policy_2_2": -73.22659301757812, "logps_train/policy_2_w": -108.7840576171875, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 1.3363080024719238, "rewards_train/1-l": -4.006894111633301, "rewards_train/1-w": 2.448259115219116, "rewards_train/2-2": 2.156247138977051, "rewards_train/2-w": 1.0465941429138184, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.455153226852417, "rewards_train/margins_1": 1.1119511127471924, "rewards_train/margins_2": 1.1096529960632324, "step": 406 }, { "epoch": 1.22, "logps_train/policy_1_2": -165.38064575195312, "logps_train/policy_1_l": -232.80038452148438, "logps_train/policy_1_w": -196.87448120117188, "logps_train/policy_2_2": -136.69500732421875, "logps_train/policy_2_w": -236.91708374023438, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -201.0, "logps_train/ref_1_w": -239.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 2.518185615539551, "rewards_train/1-l": -3.207918643951416, "rewards_train/1-w": 4.2094268798828125, "rewards_train/2-2": 3.805499315261841, "rewards_train/2-w": 2.317666530609131, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.4173455238342285, "rewards_train/margins_1": 1.6912412643432617, "rewards_train/margins_2": 1.48783278465271, "step": 406 }, { "epoch": 1.22, "logps_train/policy_1_2": -201.54432678222656, "logps_train/policy_1_l": -201.66552734375, "logps_train/policy_1_w": -125.72076416015625, "logps_train/policy_2_2": -152.00897216796875, "logps_train/policy_2_w": -162.59783935546875, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 0.015684187412261963, "rewards_train/1-l": -3.473193645477295, "rewards_train/1-w": 4.11053991317749, "rewards_train/2-2": 2.460235118865967, "rewards_train/2-w": 2.9638495445251465, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.583733558654785, "rewards_train/margins_1": 4.094855725765228, "rewards_train/margins_2": -0.5036144256591797, "step": 406 }, { "epoch": 1.22, "logps_train/policy_1_2": -145.1341552734375, "logps_train/policy_1_l": -178.60040283203125, "logps_train/policy_1_w": -147.04153442382812, "logps_train/policy_2_2": -108.11088562011719, "logps_train/policy_2_w": -190.2738800048828, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.2975208759307861, "rewards_train/1-l": -3.3084778785705566, "rewards_train/1-w": 3.4911599159240723, "rewards_train/2-2": 2.682661771774292, "rewards_train/2-w": 1.5241740942001343, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.799637794494629, "rewards_train/margins_1": 2.193639039993286, "rewards_train/margins_2": 1.1584876775741577, "step": 407 }, { "epoch": 1.22, "logps_train/policy_1_2": -146.72512817382812, "logps_train/policy_1_l": -202.52059936523438, "logps_train/policy_1_w": -104.32560729980469, "logps_train/policy_2_2": -107.93959045410156, "logps_train/policy_2_w": -147.07278442382812, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.2181110382080078, "rewards_train/1-l": -2.418757915496826, "rewards_train/1-w": 2.8861896991729736, "rewards_train/2-2": 2.7997913360595703, "rewards_train/2-w": 1.3802223205566406, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.3049476146698, "rewards_train/margins_1": 1.6680786609649658, "rewards_train/margins_2": 1.4195690155029297, "step": 407 }, { "epoch": 1.22, "logps_train/policy_1_2": -76.7235107421875, "logps_train/policy_1_l": -120.66626739501953, "logps_train/policy_1_w": -111.20729064941406, "logps_train/policy_2_2": -52.33495330810547, "logps_train/policy_2_w": -168.24949645996094, "logps_train/ref_1_2": -85.5, "logps_train/ref_1_l": -99.5, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -69.5, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 0.8823366165161133, "rewards_train/1-l": -2.10019588470459, "rewards_train/1-w": 3.348802089691162, "rewards_train/2-2": 1.7157237529754639, "rewards_train/2-w": 0.7875504493713379, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.448997974395752, "rewards_train/margins_1": 2.466465473175049, "rewards_train/margins_2": 0.928173303604126, "step": 407 }, { "epoch": 1.22, "logps_train/policy_1_2": -92.46652221679688, "logps_train/policy_1_l": -124.09941864013672, "logps_train/policy_1_w": -67.0691146850586, "logps_train/policy_2_2": -75.73027801513672, "logps_train/policy_2_w": -83.305419921875, "logps_train/ref_1_2": -108.5, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -97.0, "rewards_train/1-2": 1.6135039329528809, "rewards_train/1-l": -1.5802538394927979, "rewards_train/1-w": 2.2680881023406982, "rewards_train/2-2": 2.194941520690918, "rewards_train/2-w": 1.378051996231079, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.848341941833496, "rewards_train/margins_1": 0.6545841693878174, "rewards_train/margins_2": 0.8168895244598389, "step": 407 }, { "epoch": 1.22, "logps_train/policy_1_2": -191.3946990966797, "logps_train/policy_1_l": -218.67002868652344, "logps_train/policy_1_w": -156.48361206054688, "logps_train/policy_2_2": -154.64181518554688, "logps_train/policy_2_w": -204.7838897705078, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -199.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.9605302810668945, "rewards_train/1-l": -2.0056748390197754, "rewards_train/1-w": 3.4908978939056396, "rewards_train/2-2": 3.482692241668701, "rewards_train/2-w": 1.6887983083724976, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.496572732925415, "rewards_train/margins_1": 1.5303676128387451, "rewards_train/margins_2": 1.7938939332962036, "step": 407 }, { "epoch": 1.22, "logps_train/policy_1_2": -177.81362915039062, "logps_train/policy_1_l": -177.72714233398438, "logps_train/policy_1_w": -158.5689697265625, "logps_train/policy_2_2": -140.67776489257812, "logps_train/policy_2_w": -199.88900756835938, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": 2.206136703491211, "rewards_train/1-l": -1.413143515586853, "rewards_train/1-w": 3.3899779319763184, "rewards_train/2-2": 3.766599178314209, "rewards_train/2-w": 1.9095380306243896, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.803121447563171, "rewards_train/margins_1": 1.1838412284851074, "rewards_train/margins_2": 1.8570611476898193, "step": 407 }, { "epoch": 1.22, "logps_train/policy_1_2": -186.42486572265625, "logps_train/policy_1_l": -202.0773162841797, "logps_train/policy_1_w": -85.17185974121094, "logps_train/policy_2_2": -144.24154663085938, "logps_train/policy_2_w": -113.6434097290039, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.3161076307296753, "rewards_train/1-l": -3.1085145473480225, "rewards_train/1-w": 2.8554704189300537, "rewards_train/2-2": 2.8899083137512207, "rewards_train/2-w": 1.7903461456298828, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.963984966278076, "rewards_train/margins_1": 1.5393627882003784, "rewards_train/margins_2": 1.099562168121338, "step": 407 }, { "epoch": 1.22, "logps_train/policy_1_2": -240.86012268066406, "logps_train/policy_1_l": -175.6473388671875, "logps_train/policy_1_w": -155.09451293945312, "logps_train/policy_2_2": -201.81668090820312, "logps_train/policy_2_w": -194.79592895507812, "logps_train/ref_1_2": -262.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -242.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 2.070237636566162, "rewards_train/1-l": -1.7928582429885864, "rewards_train/1-w": 3.684299945831299, "rewards_train/2-2": 4.033958435058594, "rewards_train/2-w": 2.0485310554504395, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.477158188819885, "rewards_train/margins_1": 1.6140623092651367, "rewards_train/margins_2": 1.9854273796081543, "step": 407 }, { "epoch": 1.22, "learning_rate": 1.8172400063285423e-06, "loss": 0.4135, "step": 408 }, { "epoch": 1.22, "logps_train/policy_1_2": -161.29690551757812, "logps_train/policy_1_l": -233.25241088867188, "logps_train/policy_1_w": -100.90792846679688, "logps_train/policy_2_2": -123.40471649169922, "logps_train/policy_2_w": -146.35987854003906, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.3015589714050293, "rewards_train/1-l": -3.0346176624298096, "rewards_train/1-w": 3.0654571056365967, "rewards_train/2-2": 2.4603095054626465, "rewards_train/2-w": 1.0593245029449463, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.100074768066406, "rewards_train/margins_1": 1.7638981342315674, "rewards_train/margins_2": 1.4009850025177002, "step": 408 }, { "epoch": 1.22, "logps_train/policy_1_2": -230.8580322265625, "logps_train/policy_1_l": -172.503662109375, "logps_train/policy_1_w": -99.12808227539062, "logps_train/policy_2_2": -176.77061462402344, "logps_train/policy_2_w": -129.69117736816406, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.7040412425994873, "rewards_train/1-l": -2.189525604248047, "rewards_train/1-w": 3.0793797969818115, "rewards_train/2-2": 3.99246883392334, "rewards_train/2-w": 1.9886947870254517, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.268905401229858, "rewards_train/margins_1": 1.3753385543823242, "rewards_train/margins_2": 2.003774046897888, "step": 408 }, { "epoch": 1.22, "logps_train/policy_1_2": -116.45214080810547, "logps_train/policy_1_l": -231.03538513183594, "logps_train/policy_1_w": -155.2339324951172, "logps_train/policy_2_2": -95.3348388671875, "logps_train/policy_2_w": -189.1904296875, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": 2.1422858238220215, "rewards_train/1-l": -3.5722875595092773, "rewards_train/1-w": 2.5836381912231445, "rewards_train/2-2": 2.772765636444092, "rewards_train/2-w": 0.8411136865615845, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.155925750732422, "rewards_train/margins_1": 0.44135236740112305, "rewards_train/margins_2": 1.9316519498825073, "step": 408 }, { "epoch": 1.22, "logps_train/policy_1_2": -197.58792114257812, "logps_train/policy_1_l": -256.3082275390625, "logps_train/policy_1_w": -173.05734252929688, "logps_train/policy_2_2": -156.54135131835938, "logps_train/policy_2_w": -219.2632598876953, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.0787079334259033, "rewards_train/1-l": -2.869105815887451, "rewards_train/1-w": 3.3755149841308594, "rewards_train/2-2": 2.6771154403686523, "rewards_train/2-w": 1.492423415184021, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.2446208000183105, "rewards_train/margins_1": 2.296807050704956, "rewards_train/margins_2": 1.1846920251846313, "step": 408 }, { "epoch": 1.22, "logps_train/policy_1_2": -163.13192749023438, "logps_train/policy_1_l": -205.10739135742188, "logps_train/policy_1_w": -188.83795166015625, "logps_train/policy_2_2": -125.64295196533203, "logps_train/policy_2_w": -238.77423095703125, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -223.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -251.0, "rewards_train/1-2": 1.3348548412322998, "rewards_train/1-l": -2.8333964347839355, "rewards_train/1-w": 3.39198637008667, "rewards_train/2-2": 3.198204517364502, "rewards_train/2-w": 1.1913280487060547, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.2253828048706055, "rewards_train/margins_1": 2.05713152885437, "rewards_train/margins_2": 2.0068764686584473, "step": 408 }, { "epoch": 1.22, "logps_train/policy_1_2": -159.3271026611328, "logps_train/policy_1_l": -138.69265747070312, "logps_train/policy_1_w": -136.60580444335938, "logps_train/policy_2_2": -130.16046142578125, "logps_train/policy_2_w": -165.05593872070312, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.7075245380401611, "rewards_train/1-l": -1.5499297380447388, "rewards_train/1-w": 3.0925445556640625, "rewards_train/2-2": 2.637176752090454, "rewards_train/2-w": 1.725656270980835, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.642474293708801, "rewards_train/margins_1": 1.3850200176239014, "rewards_train/margins_2": 0.9115204811096191, "step": 408 }, { "epoch": 1.22, "logps_train/policy_1_2": -149.7728271484375, "logps_train/policy_1_l": -65.5689697265625, "logps_train/policy_1_w": -51.20752716064453, "logps_train/policy_2_2": -112.7488784790039, "logps_train/policy_2_w": -77.13206481933594, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -49.25, "logps_train/ref_1_w": -81.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": 0.6766241788864136, "rewards_train/1-l": -1.6315064430236816, "rewards_train/1-w": 2.9964351654052734, "rewards_train/2-2": 2.560659408569336, "rewards_train/2-w": 1.5633562803268433, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.627941608428955, "rewards_train/margins_1": 2.31981098651886, "rewards_train/margins_2": 0.9973031282424927, "step": 408 }, { "epoch": 1.22, "logps_train/policy_1_2": -122.39309692382812, "logps_train/policy_1_l": -143.50668334960938, "logps_train/policy_1_w": -107.11128234863281, "logps_train/policy_2_2": -91.49085235595703, "logps_train/policy_2_w": -152.927734375, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -125.5, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -122.5, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": 2.0122530460357666, "rewards_train/1-l": -1.7944180965423584, "rewards_train/1-w": 3.1294970512390137, "rewards_train/2-2": 3.1091179847717285, "rewards_train/2-w": 1.3853510618209839, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.923915147781372, "rewards_train/margins_1": 1.117244005203247, "rewards_train/margins_2": 1.7237669229507446, "step": 408 }, { "epoch": 1.22, "logps_train/policy_1_2": -140.19580078125, "logps_train/policy_1_l": -165.49176025390625, "logps_train/policy_1_w": -84.12155151367188, "logps_train/policy_2_2": -118.27467346191406, "logps_train/policy_2_w": -104.80856323242188, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 1.268700122833252, "rewards_train/1-l": -2.1134095191955566, "rewards_train/1-w": 2.1533727645874023, "rewards_train/2-2": 2.3397200107574463, "rewards_train/2-w": 1.3531277179718018, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.266782283782959, "rewards_train/margins_1": 0.8846726417541504, "rewards_train/margins_2": 0.9865922927856445, "step": 409 }, { "epoch": 1.22, "logps_train/policy_1_2": -120.18077087402344, "logps_train/policy_1_l": -139.3222198486328, "logps_train/policy_1_w": -78.73220825195312, "logps_train/policy_2_2": -87.99462127685547, "logps_train/policy_2_w": -138.35986328125, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 0.7830950021743774, "rewards_train/1-l": -2.5322213172912598, "rewards_train/1-w": 3.1754117012023926, "rewards_train/2-2": 2.277881622314453, "rewards_train/2-w": 0.9810054302215576, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.707633018493652, "rewards_train/margins_1": 2.392316699028015, "rewards_train/margins_2": 1.2968761920928955, "step": 409 }, { "epoch": 1.22, "logps_train/policy_1_2": -218.6106414794922, "logps_train/policy_1_l": -172.40333557128906, "logps_train/policy_1_w": -134.424072265625, "logps_train/policy_2_2": -163.49661254882812, "logps_train/policy_2_w": -174.37876892089844, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": 2.1529974937438965, "rewards_train/1-l": -2.4708027839660645, "rewards_train/1-w": 3.9712657928466797, "rewards_train/2-2": 4.826120376586914, "rewards_train/2-w": 2.458216905593872, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.442068576812744, "rewards_train/margins_1": 1.8182682991027832, "rewards_train/margins_2": 2.367903470993042, "step": 409 }, { "epoch": 1.22, "logps_train/policy_1_2": -220.27651977539062, "logps_train/policy_1_l": -133.4174346923828, "logps_train/policy_1_w": -136.60845947265625, "logps_train/policy_2_2": -176.32632446289062, "logps_train/policy_2_w": -180.77713012695312, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.3856298923492432, "rewards_train/1-l": -1.1429893970489502, "rewards_train/1-w": 3.15321683883667, "rewards_train/2-2": 3.738461971282959, "rewards_train/2-w": 1.1441603899002075, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.29620623588562, "rewards_train/margins_1": 1.7675869464874268, "rewards_train/margins_2": 2.5943015813827515, "step": 409 }, { "epoch": 1.22, "logps_train/policy_1_2": -168.9367218017578, "logps_train/policy_1_l": -135.72442626953125, "logps_train/policy_1_w": -127.06632995605469, "logps_train/policy_2_2": -148.62112426757812, "logps_train/policy_2_w": -152.31698608398438, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.4957804679870605, "rewards_train/1-l": -1.7398254871368408, "rewards_train/1-w": 2.746101140975952, "rewards_train/2-2": 2.5343713760375977, "rewards_train/2-w": 1.4272857904434204, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.485926628112793, "rewards_train/margins_1": 1.2503206729888916, "rewards_train/margins_2": 1.1070855855941772, "step": 409 }, { "epoch": 1.22, "logps_train/policy_1_2": -294.6170959472656, "logps_train/policy_1_l": -209.999267578125, "logps_train/policy_1_w": -137.36444091796875, "logps_train/policy_2_2": -212.5338897705078, "logps_train/policy_2_w": -180.0975341796875, "logps_train/ref_1_2": -300.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -255.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 0.613289475440979, "rewards_train/1-l": -2.317113161087036, "rewards_train/1-w": 3.3932433128356934, "rewards_train/2-2": 4.24661111831665, "rewards_train/2-w": 2.0402467250823975, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.7103564739227295, "rewards_train/margins_1": 2.7799538373947144, "rewards_train/margins_2": 2.206364393234253, "step": 409 }, { "epoch": 1.22, "logps_train/policy_1_2": -105.11161804199219, "logps_train/policy_1_l": -91.4878921508789, "logps_train/policy_1_w": -63.61745834350586, "logps_train/policy_2_2": -80.33283233642578, "logps_train/policy_2_w": -84.62191772460938, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -101.0, "rewards_train/1-2": 1.392745018005371, "rewards_train/1-l": -2.1417579650878906, "rewards_train/1-w": 2.767160415649414, "rewards_train/2-2": 2.1784353256225586, "rewards_train/2-w": 1.6510894298553467, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.908918380737305, "rewards_train/margins_1": 1.374415397644043, "rewards_train/margins_2": 0.5273458957672119, "step": 409 }, { "epoch": 1.22, "logps_train/policy_1_2": -191.21017456054688, "logps_train/policy_1_l": -199.95388793945312, "logps_train/policy_1_w": -111.78681945800781, "logps_train/policy_2_2": -149.77108764648438, "logps_train/policy_2_w": -156.3661651611328, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.363357424736023, "rewards_train/1-l": -2.241483211517334, "rewards_train/1-w": 3.882255792617798, "rewards_train/2-2": 3.1197664737701416, "rewards_train/2-w": 2.122758388519287, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.123739004135132, "rewards_train/margins_1": 2.518898367881775, "rewards_train/margins_2": 0.9970080852508545, "step": 409 }, { "epoch": 1.23, "learning_rate": 1.793514564001503e-06, "loss": 0.4369, "step": 410 }, { "epoch": 1.23, "logps_train/policy_1_2": -216.63818359375, "logps_train/policy_1_l": -186.72650146484375, "logps_train/policy_1_w": -117.59440612792969, "logps_train/policy_2_2": -155.2633056640625, "logps_train/policy_2_w": -191.7208709716797, "logps_train/ref_1_2": -239.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 2.2424330711364746, "rewards_train/1-l": -2.7386672496795654, "rewards_train/1-w": 3.492121458053589, "rewards_train/2-2": 4.44866943359375, "rewards_train/2-w": 0.9841635823249817, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.230788707733154, "rewards_train/margins_1": 1.2496883869171143, "rewards_train/margins_2": 3.4645058512687683, "step": 410 }, { "epoch": 1.23, "logps_train/policy_1_2": -210.652587890625, "logps_train/policy_1_l": -152.31060791015625, "logps_train/policy_1_w": -66.90734100341797, "logps_train/policy_2_2": -155.52993774414062, "logps_train/policy_2_w": -95.44754028320312, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -110.0, "rewards_train/1-2": 0.907007098197937, "rewards_train/1-l": -2.622272491455078, "rewards_train/1-w": 2.1709845066070557, "rewards_train/2-2": 3.2662947177886963, "rewards_train/2-w": 1.5052456855773926, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.793256998062134, "rewards_train/margins_1": 1.2639774084091187, "rewards_train/margins_2": 1.7610490322113037, "step": 410 }, { "epoch": 1.23, "logps_train/policy_1_2": -96.79057312011719, "logps_train/policy_1_l": -146.94467163085938, "logps_train/policy_1_w": -100.91841125488281, "logps_train/policy_2_2": -73.90592956542969, "logps_train/policy_2_w": -139.77760314941406, "logps_train/ref_1_2": -108.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -93.5, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 1.095942735671997, "rewards_train/1-l": -2.5639984607696533, "rewards_train/1-w": 2.621049404144287, "rewards_train/2-2": 1.9414377212524414, "rewards_train/2-w": 0.9105206727981567, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.18504786491394, "rewards_train/margins_1": 1.52510666847229, "rewards_train/margins_2": 1.0309170484542847, "step": 410 }, { "epoch": 1.23, "logps_train/policy_1_2": -143.95989990234375, "logps_train/policy_1_l": -189.22225952148438, "logps_train/policy_1_w": -96.78959655761719, "logps_train/policy_2_2": -106.97328186035156, "logps_train/policy_2_w": -132.50277709960938, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.6274480819702148, "rewards_train/1-l": -2.4648027420043945, "rewards_train/1-w": 2.9366660118103027, "rewards_train/2-2": 2.5350944995880127, "rewards_train/2-w": 1.5965982675552368, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.401468753814697, "rewards_train/margins_1": 1.309217929840088, "rewards_train/margins_2": 0.9384962320327759, "step": 410 }, { "epoch": 1.23, "logps_train/policy_1_2": -137.21014404296875, "logps_train/policy_1_l": -188.5291748046875, "logps_train/policy_1_w": -158.18504333496094, "logps_train/policy_2_2": -109.7821044921875, "logps_train/policy_2_w": -192.25314331054688, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 2.238361120223999, "rewards_train/1-l": -2.4576048851013184, "rewards_train/1-w": 3.3537607192993164, "rewards_train/2-2": 3.193665027618408, "rewards_train/2-w": 1.6278105974197388, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.811365604400635, "rewards_train/margins_1": 1.1153995990753174, "rewards_train/margins_2": 1.5658544301986694, "step": 410 }, { "epoch": 1.23, "logps_train/policy_1_2": -144.2636260986328, "logps_train/policy_1_l": -157.74740600585938, "logps_train/policy_1_w": -146.08741760253906, "logps_train/policy_2_2": -105.61634826660156, "logps_train/policy_2_w": -194.3016815185547, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 2.0267624855041504, "rewards_train/1-l": -1.4210293292999268, "rewards_train/1-w": 3.6537585258483887, "rewards_train/2-2": 3.116489887237549, "rewards_train/2-w": 1.760455846786499, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.074787855148315, "rewards_train/margins_1": 1.6269960403442383, "rewards_train/margins_2": 1.3560340404510498, "step": 410 }, { "epoch": 1.23, "logps_train/policy_1_2": -230.4941864013672, "logps_train/policy_1_l": -234.97271728515625, "logps_train/policy_1_w": -158.57186889648438, "logps_train/policy_2_2": -192.26763916015625, "logps_train/policy_2_w": -204.01962280273438, "logps_train/ref_1_2": -255.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 2.4818310737609863, "rewards_train/1-l": -2.3824281692504883, "rewards_train/1-w": 4.0693769454956055, "rewards_train/2-2": 3.6638622283935547, "rewards_train/2-w": 2.537099838256836, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.451805114746094, "rewards_train/margins_1": 1.5875458717346191, "rewards_train/margins_2": 1.1267623901367188, "step": 410 }, { "epoch": 1.23, "logps_train/policy_1_2": -247.1728057861328, "logps_train/policy_1_l": -246.31301879882812, "logps_train/policy_1_w": -161.93711853027344, "logps_train/policy_2_2": -197.4326171875, "logps_train/policy_2_w": -204.83963012695312, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -242.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 2.3194384574890137, "rewards_train/1-l": -3.4328651428222656, "rewards_train/1-w": 3.7797253131866455, "rewards_train/2-2": 4.425975322723389, "rewards_train/2-w": 2.1277565956115723, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.212590456008911, "rewards_train/margins_1": 1.4602868556976318, "rewards_train/margins_2": 2.2982187271118164, "step": 410 }, { "epoch": 1.23, "logps_train/policy_1_2": -141.6595458984375, "logps_train/policy_1_l": -129.42539978027344, "logps_train/policy_1_w": -134.7384490966797, "logps_train/policy_2_2": -107.43862915039062, "logps_train/policy_2_w": -183.08242797851562, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.2508412599563599, "rewards_train/1-l": -1.766758680343628, "rewards_train/1-w": 3.017561912536621, "rewards_train/2-2": 2.641683340072632, "rewards_train/2-w": 0.5319900512695312, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.784320592880249, "rewards_train/margins_1": 1.7667206525802612, "rewards_train/margins_2": 2.1096932888031006, "step": 411 }, { "epoch": 1.23, "logps_train/policy_1_2": -160.99493408203125, "logps_train/policy_1_l": -142.86795043945312, "logps_train/policy_1_w": -144.92633056640625, "logps_train/policy_2_2": -121.30599975585938, "logps_train/policy_2_w": -188.26422119140625, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.9848812818527222, "rewards_train/1-l": -1.7930448055267334, "rewards_train/1-w": 3.1706490516662598, "rewards_train/2-2": 3.201432228088379, "rewards_train/2-w": 1.3798277378082275, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.963693857192993, "rewards_train/margins_1": 1.1857677698135376, "rewards_train/margins_2": 1.8216044902801514, "step": 411 }, { "epoch": 1.23, "logps_train/policy_1_2": -168.75209045410156, "logps_train/policy_1_l": -221.8375244140625, "logps_train/policy_1_w": -166.00765991210938, "logps_train/policy_2_2": -134.1571502685547, "logps_train/policy_2_w": -215.02093505859375, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.2599480152130127, "rewards_train/1-l": -2.98834228515625, "rewards_train/1-w": 2.7308731079101562, "rewards_train/2-2": 2.870612382888794, "rewards_train/2-w": 0.35259366035461426, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.719215393066406, "rewards_train/margins_1": 1.4709250926971436, "rewards_train/margins_2": 2.5180187225341797, "step": 411 }, { "epoch": 1.23, "logps_train/policy_1_2": -149.4127197265625, "logps_train/policy_1_l": -202.47027587890625, "logps_train/policy_1_w": -104.30087280273438, "logps_train/policy_2_2": -116.63507080078125, "logps_train/policy_2_w": -139.5138702392578, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.1712287664413452, "rewards_train/1-l": -2.4829649925231934, "rewards_train/1-w": 2.8605382442474365, "rewards_train/2-2": 2.836493492126465, "rewards_train/2-w": 1.6314252614974976, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.34350323677063, "rewards_train/margins_1": 1.6893094778060913, "rewards_train/margins_2": 1.2050682306289673, "step": 411 }, { "epoch": 1.23, "logps_train/policy_1_2": -200.839111328125, "logps_train/policy_1_l": -161.31588745117188, "logps_train/policy_1_w": -77.98635864257812, "logps_train/policy_2_2": -152.3755645751953, "logps_train/policy_2_w": -111.98187255859375, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 0.8129649758338928, "rewards_train/1-l": -2.4048309326171875, "rewards_train/1-w": 2.951364040374756, "rewards_train/2-2": 3.3030686378479004, "rewards_train/2-w": 1.7565006017684937, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.356194972991943, "rewards_train/margins_1": 2.138399064540863, "rewards_train/margins_2": 1.5465680360794067, "step": 411 }, { "epoch": 1.23, "logps_train/policy_1_2": -167.73443603515625, "logps_train/policy_1_l": -185.87548828125, "logps_train/policy_1_w": -147.5562286376953, "logps_train/policy_2_2": -128.3529510498047, "logps_train/policy_2_w": -190.16859436035156, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.42733895778656, "rewards_train/1-l": -2.93442440032959, "rewards_train/1-w": 3.800626754760742, "rewards_train/2-2": 2.738337516784668, "rewards_train/2-w": 1.9800149202346802, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.735051155090332, "rewards_train/margins_1": 2.373287796974182, "rewards_train/margins_2": 0.7583225965499878, "step": 411 }, { "epoch": 1.23, "logps_train/policy_1_2": -126.42264556884766, "logps_train/policy_1_l": -138.0481719970703, "logps_train/policy_1_w": -89.69811248779297, "logps_train/policy_2_2": -85.44573211669922, "logps_train/policy_2_w": -137.57669067382812, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.1389853954315186, "rewards_train/1-l": -1.9173176288604736, "rewards_train/1-w": 2.8630013465881348, "rewards_train/2-2": 2.4515204429626465, "rewards_train/2-w": 0.7669403553009033, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.780318975448608, "rewards_train/margins_1": 1.7240159511566162, "rewards_train/margins_2": 1.6845800876617432, "step": 411 }, { "epoch": 1.23, "logps_train/policy_1_2": -111.30178833007812, "logps_train/policy_1_l": -89.8155517578125, "logps_train/policy_1_w": -78.69685363769531, "logps_train/policy_2_2": -92.509521484375, "logps_train/policy_2_w": -111.63999938964844, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -72.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 1.1799771785736084, "rewards_train/1-l": -1.7850712537765503, "rewards_train/1-w": 2.991643190383911, "rewards_train/2-2": 2.1412353515625, "rewards_train/2-w": 1.5098276138305664, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.776714444160461, "rewards_train/margins_1": 1.8116660118103027, "rewards_train/margins_2": 0.6314077377319336, "step": 411 }, { "epoch": 1.23, "learning_rate": 1.769858073407561e-06, "loss": 0.4198, "step": 412 }, { "epoch": 1.23, "logps_train/policy_1_2": -96.92680358886719, "logps_train/policy_1_l": -120.25886535644531, "logps_train/policy_1_w": -60.666908264160156, "logps_train/policy_2_2": -66.18814086914062, "logps_train/policy_2_w": -98.60717010498047, "logps_train/ref_1_2": -108.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -86.5, "logps_train/ref_2_2": -88.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 1.1026325225830078, "rewards_train/1-l": -1.5176843404769897, "rewards_train/1-w": 2.5684657096862793, "rewards_train/2-2": 2.2108726501464844, "rewards_train/2-w": 1.0338139533996582, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.086150050163269, "rewards_train/margins_1": 1.4658331871032715, "rewards_train/margins_2": 1.1770586967468262, "step": 412 }, { "epoch": 1.23, "logps_train/policy_1_2": -195.23025512695312, "logps_train/policy_1_l": -139.1151580810547, "logps_train/policy_1_w": -146.0681610107422, "logps_train/policy_2_2": -154.2869110107422, "logps_train/policy_2_w": -178.09133911132812, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": 0.7436479330062866, "rewards_train/1-l": -1.453434944152832, "rewards_train/1-w": 4.543184757232666, "rewards_train/2-2": 2.5812716484069824, "rewards_train/2-w": 3.0986783504486084, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.996619701385498, "rewards_train/margins_1": 3.7995368242263794, "rewards_train/margins_2": -0.517406702041626, "step": 412 }, { "epoch": 1.23, "logps_train/policy_1_2": -95.81006622314453, "logps_train/policy_1_l": -111.10394287109375, "logps_train/policy_1_w": -84.2774429321289, "logps_train/policy_2_2": -73.71320343017578, "logps_train/policy_2_w": -112.9222412109375, "logps_train/ref_1_2": -106.0, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -113.5, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.9827141761779785, "rewards_train/1-l": -2.0307066440582275, "rewards_train/1-w": 2.9292869567871094, "rewards_train/2-2": 2.2428886890411377, "rewards_train/2-w": 2.1218388080596924, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.959993600845337, "rewards_train/margins_1": 1.9465727806091309, "rewards_train/margins_2": 0.12104988098144531, "step": 412 }, { "epoch": 1.23, "logps_train/policy_1_2": -160.66326904296875, "logps_train/policy_1_l": -154.65528869628906, "logps_train/policy_1_w": -109.42557525634766, "logps_train/policy_2_2": -133.08062744140625, "logps_train/policy_2_w": -140.95928955078125, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.6938304901123047, "rewards_train/1-l": -2.1288094520568848, "rewards_train/1-w": 2.50431752204895, "rewards_train/2-2": 2.706000328063965, "rewards_train/2-w": 1.2907898426055908, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.633126974105835, "rewards_train/margins_1": 0.8104870319366455, "rewards_train/margins_2": 1.415210485458374, "step": 412 }, { "epoch": 1.23, "logps_train/policy_1_2": -106.78749084472656, "logps_train/policy_1_l": -174.17742919921875, "logps_train/policy_1_w": -126.53199768066406, "logps_train/policy_2_2": -87.63793182373047, "logps_train/policy_2_w": -181.01283264160156, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.4251573085784912, "rewards_train/1-l": -2.2521190643310547, "rewards_train/1-w": 2.8128161430358887, "rewards_train/2-2": 2.15378475189209, "rewards_train/2-w": 1.181528925895691, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.064935207366943, "rewards_train/margins_1": 1.3876588344573975, "rewards_train/margins_2": 0.9722558259963989, "step": 412 }, { "epoch": 1.23, "logps_train/policy_1_2": -153.48106384277344, "logps_train/policy_1_l": -218.36575317382812, "logps_train/policy_1_w": -154.03533935546875, "logps_train/policy_2_2": -106.98890686035156, "logps_train/policy_2_w": -195.55462646484375, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.169863224029541, "rewards_train/1-l": -3.1334500312805176, "rewards_train/1-w": 3.90584135055542, "rewards_train/2-2": 2.658921957015991, "rewards_train/2-w": 2.3007869720458984, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 7.0392913818359375, "rewards_train/margins_1": 2.735978126525879, "rewards_train/margins_2": 0.3581349849700928, "step": 412 }, { "epoch": 1.23, "logps_train/policy_1_2": -73.0784912109375, "logps_train/policy_1_l": -75.09858703613281, "logps_train/policy_1_w": -70.45915222167969, "logps_train/policy_2_2": -54.82754898071289, "logps_train/policy_2_w": -100.61981964111328, "logps_train/ref_1_2": -80.5, "logps_train/ref_1_l": -63.25, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -70.5, "logps_train/ref_2_w": -109.5, "rewards_train/1-2": 0.7628540396690369, "rewards_train/1-l": -1.173726201057434, "rewards_train/1-w": 2.1322100162506104, "rewards_train/2-2": 1.5684170722961426, "rewards_train/2-w": 0.8786430954933167, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.3059362173080444, "rewards_train/margins_1": 1.3693559765815735, "rewards_train/margins_2": 0.6897739768028259, "step": 412 }, { "epoch": 1.23, "logps_train/policy_1_2": -57.67997360229492, "logps_train/policy_1_l": -65.50804901123047, "logps_train/policy_1_w": -72.00273132324219, "logps_train/policy_2_2": -35.30359649658203, "logps_train/policy_2_w": -104.79961395263672, "logps_train/ref_1_2": -66.0, "logps_train/ref_1_l": -57.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -52.25, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 0.8273151516914368, "rewards_train/1-l": -0.8390862345695496, "rewards_train/1-w": 2.587031841278076, "rewards_train/2-2": 1.696984052658081, "rewards_train/2-w": 1.1301952600479126, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4261180758476257, "rewards_train/margins_1": 1.7597166895866394, "rewards_train/margins_2": 0.5667887926101685, "step": 412 }, { "epoch": 1.24, "logps_train/policy_1_2": -131.51841735839844, "logps_train/policy_1_l": -165.91822814941406, "logps_train/policy_1_w": -76.6953125, "logps_train/policy_2_2": -109.33243560791016, "logps_train/policy_2_w": -109.44098663330078, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 2.032533645629883, "rewards_train/1-l": -2.9879167079925537, "rewards_train/1-w": 2.8023440837860107, "rewards_train/2-2": 2.8628501892089844, "rewards_train/2-w": 1.4340258836746216, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.7902607917785645, "rewards_train/margins_1": 0.7698104381561279, "rewards_train/margins_2": 1.4288243055343628, "step": 413 }, { "epoch": 1.24, "logps_train/policy_1_2": -162.08474731445312, "logps_train/policy_1_l": -174.3665008544922, "logps_train/policy_1_w": -91.28096771240234, "logps_train/policy_2_2": -110.73111724853516, "logps_train/policy_2_w": -144.3441925048828, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.0712130069732666, "rewards_train/1-l": -2.5737600326538086, "rewards_train/1-w": 3.1969032287597656, "rewards_train/2-2": 2.8565759658813477, "rewards_train/2-w": 1.418705940246582, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.770663261413574, "rewards_train/margins_1": 2.125690221786499, "rewards_train/margins_2": 1.4378700256347656, "step": 413 }, { "epoch": 1.24, "logps_train/policy_1_2": -321.0780334472656, "logps_train/policy_1_l": -270.36004638671875, "logps_train/policy_1_w": -145.8832550048828, "logps_train/policy_2_2": -235.39950561523438, "logps_train/policy_2_w": -194.73007202148438, "logps_train/ref_1_2": -324.0, "logps_train/ref_1_l": -243.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -272.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 0.35157108306884766, "rewards_train/1-l": -2.7361981868743896, "rewards_train/1-w": 3.6897993087768555, "rewards_train/2-2": 3.766300678253174, "rewards_train/2-w": 2.0100982189178467, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.425997495651245, "rewards_train/margins_1": 3.338228225708008, "rewards_train/margins_2": 1.7562024593353271, "step": 413 }, { "epoch": 1.24, "logps_train/policy_1_2": -113.55078887939453, "logps_train/policy_1_l": -127.28865814208984, "logps_train/policy_1_w": -97.80050659179688, "logps_train/policy_2_2": -86.89326477050781, "logps_train/policy_2_w": -133.807861328125, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.4980459213256836, "rewards_train/1-l": -1.7378504276275635, "rewards_train/1-w": 3.3121373653411865, "rewards_train/2-2": 2.409111499786377, "rewards_train/2-w": 1.8301522731781006, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.04998779296875, "rewards_train/margins_1": 1.814091444015503, "rewards_train/margins_2": 0.5789592266082764, "step": 413 }, { "epoch": 1.24, "logps_train/policy_1_2": -87.10380554199219, "logps_train/policy_1_l": -114.4024429321289, "logps_train/policy_1_w": -58.16378402709961, "logps_train/policy_2_2": -63.483558654785156, "logps_train/policy_2_w": -92.0995864868164, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -97.5, "logps_train/ref_1_w": -76.5, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -100.0, "rewards_train/1-2": 1.2739949226379395, "rewards_train/1-l": -1.6820898056030273, "rewards_train/1-w": 1.8088657855987549, "rewards_train/2-2": 2.135287046432495, "rewards_train/2-w": 0.8251482844352722, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4909555912017822, "rewards_train/margins_1": 0.5348708629608154, "rewards_train/margins_2": 1.310138761997223, "step": 413 }, { "epoch": 1.24, "logps_train/policy_1_2": -156.73883056640625, "logps_train/policy_1_l": -172.56106567382812, "logps_train/policy_1_w": -160.36495971679688, "logps_train/policy_2_2": -122.88645935058594, "logps_train/policy_2_w": -194.4825439453125, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.873772382736206, "rewards_train/1-l": -2.556352138519287, "rewards_train/1-w": 3.8133084774017334, "rewards_train/2-2": 3.212916851043701, "rewards_train/2-w": 2.318934440612793, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.3696606159210205, "rewards_train/margins_1": 1.9395360946655273, "rewards_train/margins_2": 0.8939824104309082, "step": 413 }, { "epoch": 1.24, "logps_train/policy_1_2": -205.87380981445312, "logps_train/policy_1_l": -167.11878967285156, "logps_train/policy_1_w": -119.90133666992188, "logps_train/policy_2_2": -138.8722381591797, "logps_train/policy_2_w": -172.09805297851562, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.105587363243103, "rewards_train/1-l": -3.1458640098571777, "rewards_train/1-w": 3.2153351306915283, "rewards_train/2-2": 3.4893393516540527, "rewards_train/2-w": 0.6698836088180542, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.361199140548706, "rewards_train/margins_1": 2.1097477674484253, "rewards_train/margins_2": 2.8194557428359985, "step": 413 }, { "epoch": 1.24, "logps_train/policy_1_2": -182.69871520996094, "logps_train/policy_1_l": -270.67572021484375, "logps_train/policy_1_w": -160.5955047607422, "logps_train/policy_2_2": -143.09185791015625, "logps_train/policy_2_w": -208.91845703125, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -236.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 2.0949723720550537, "rewards_train/1-l": -3.4136643409729004, "rewards_train/1-w": 4.092013359069824, "rewards_train/2-2": 3.477337598800659, "rewards_train/2-w": 2.266749382019043, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.505677700042725, "rewards_train/margins_1": 1.9970409870147705, "rewards_train/margins_2": 1.2105882167816162, "step": 413 }, { "epoch": 1.24, "learning_rate": 1.746272843378493e-06, "loss": 0.4987, "step": 414 }, { "epoch": 1.24, "logps_train/policy_1_2": -192.8525848388672, "logps_train/policy_1_l": -168.55728149414062, "logps_train/policy_1_w": -108.17881774902344, "logps_train/policy_2_2": -149.81976318359375, "logps_train/policy_2_w": -144.92074584960938, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.1678670644760132, "rewards_train/1-l": -2.649477958679199, "rewards_train/1-w": 4.053993225097656, "rewards_train/2-2": 2.7133359909057617, "rewards_train/2-w": 2.729799509048462, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.7034711837768555, "rewards_train/margins_1": 2.886126160621643, "rewards_train/margins_2": -0.016463518142700195, "step": 414 }, { "epoch": 1.24, "logps_train/policy_1_2": -153.8775634765625, "logps_train/policy_1_l": -155.19778442382812, "logps_train/policy_1_w": -129.36996459960938, "logps_train/policy_2_2": -124.24978637695312, "logps_train/policy_2_w": -171.53353881835938, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 2.2434937953948975, "rewards_train/1-l": -2.2963407039642334, "rewards_train/1-w": 3.708315372467041, "rewards_train/2-2": 3.5812721252441406, "rewards_train/2-w": 2.0560197830200195, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.004656076431274, "rewards_train/margins_1": 1.4648215770721436, "rewards_train/margins_2": 1.525252342224121, "step": 414 }, { "epoch": 1.24, "logps_train/policy_1_2": -129.25320434570312, "logps_train/policy_1_l": -178.46795654296875, "logps_train/policy_1_w": -127.27032470703125, "logps_train/policy_2_2": -93.42618560791016, "logps_train/policy_2_w": -167.3939208984375, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.0996804237365723, "rewards_train/1-l": -1.9417657852172852, "rewards_train/1-w": 2.792449474334717, "rewards_train/2-2": 2.146444082260132, "rewards_train/2-w": 1.1319942474365234, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.734215259552002, "rewards_train/margins_1": 1.6927690505981445, "rewards_train/margins_2": 1.0144498348236084, "step": 414 }, { "epoch": 1.24, "logps_train/policy_1_2": -135.37289428710938, "logps_train/policy_1_l": -89.96097564697266, "logps_train/policy_1_w": -85.10597229003906, "logps_train/policy_2_2": -115.69414520263672, "logps_train/policy_2_w": -109.87269592285156, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.4775543212890625, "rewards_train/1-l": -1.173245906829834, "rewards_train/1-w": 2.4128894805908203, "rewards_train/2-2": 2.3532419204711914, "rewards_train/2-w": 1.300230622291565, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.5861353874206543, "rewards_train/margins_1": 0.9353351593017578, "rewards_train/margins_2": 1.0530112981796265, "step": 414 }, { "epoch": 1.24, "logps_train/policy_1_2": -205.69241333007812, "logps_train/policy_1_l": -185.57936096191406, "logps_train/policy_1_w": -126.28995513916016, "logps_train/policy_2_2": -172.92172241210938, "logps_train/policy_2_w": -162.9004669189453, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 2.6838839054107666, "rewards_train/1-l": -3.030592441558838, "rewards_train/1-w": 3.1272549629211426, "rewards_train/2-2": 4.057826995849609, "rewards_train/2-w": 1.9302661418914795, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.1578474044799805, "rewards_train/margins_1": 0.443371057510376, "rewards_train/margins_2": 2.12756085395813, "step": 414 }, { "epoch": 1.24, "logps_train/policy_1_2": -177.902587890625, "logps_train/policy_1_l": -188.87258911132812, "logps_train/policy_1_w": -130.45436096191406, "logps_train/policy_2_2": -135.1908721923828, "logps_train/policy_2_w": -168.19454956054688, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.455836296081543, "rewards_train/1-l": -2.5085489749908447, "rewards_train/1-w": 3.4600329399108887, "rewards_train/2-2": 2.9906790256500244, "rewards_train/2-w": 1.6203875541687012, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.968581914901733, "rewards_train/margins_1": 2.0041966438293457, "rewards_train/margins_2": 1.3702914714813232, "step": 414 }, { "epoch": 1.24, "logps_train/policy_1_2": -200.9183807373047, "logps_train/policy_1_l": -276.01971435546875, "logps_train/policy_1_w": -141.8693389892578, "logps_train/policy_2_2": -160.48538208007812, "logps_train/policy_2_w": -202.28366088867188, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -234.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.1331616640090942, "rewards_train/1-l": -4.151579856872559, "rewards_train/1-w": 3.4708783626556396, "rewards_train/2-2": 3.0592727661132812, "rewards_train/2-w": 1.6185083389282227, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.622458219528198, "rewards_train/margins_1": 2.3377166986465454, "rewards_train/margins_2": 1.4407644271850586, "step": 414 }, { "epoch": 1.24, "logps_train/policy_1_2": -96.62162780761719, "logps_train/policy_1_l": -60.916961669921875, "logps_train/policy_1_w": -53.28974533081055, "logps_train/policy_2_2": -59.55080795288086, "logps_train/policy_2_w": -85.31852722167969, "logps_train/ref_1_2": -107.0, "logps_train/ref_1_l": -47.75, "logps_train/ref_1_w": -70.0, "logps_train/ref_2_2": -82.5, "logps_train/ref_2_w": -89.5, "rewards_train/1-2": 1.018306016921997, "rewards_train/1-l": -1.320504903793335, "rewards_train/1-w": 1.6747362613677979, "rewards_train/2-2": 2.305075168609619, "rewards_train/2-w": 0.42674142122268677, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.995241165161133, "rewards_train/margins_1": 0.6564302444458008, "rewards_train/margins_2": 1.8783337473869324, "step": 414 }, { "epoch": 1.24, "logps_train/policy_1_2": -79.72359466552734, "logps_train/policy_1_l": -151.46707153320312, "logps_train/policy_1_w": -88.32867431640625, "logps_train/policy_2_2": -49.00111389160156, "logps_train/policy_2_w": -136.03102111816406, "logps_train/ref_1_2": -87.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -67.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 0.7385780811309814, "rewards_train/1-l": -2.9513938426971436, "rewards_train/1-w": 2.9335389137268066, "rewards_train/2-2": 1.8280136585235596, "rewards_train/2-w": 0.5707263350486755, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.88493275642395, "rewards_train/margins_1": 2.194960832595825, "rewards_train/margins_2": 1.257287323474884, "step": 415 }, { "epoch": 1.24, "logps_train/policy_1_2": -85.93406677246094, "logps_train/policy_1_l": -185.4837646484375, "logps_train/policy_1_w": -90.33228302001953, "logps_train/policy_2_2": -70.49820709228516, "logps_train/policy_2_w": -126.03939819335938, "logps_train/ref_1_2": -92.5, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.6659679412841797, "rewards_train/1-l": -2.964782953262329, "rewards_train/1-w": 2.357689380645752, "rewards_train/2-2": 1.483773112297058, "rewards_train/2-w": 0.97965407371521, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.322472333908081, "rewards_train/margins_1": 1.6917214393615723, "rewards_train/margins_2": 0.5041190385818481, "step": 415 }, { "epoch": 1.24, "logps_train/policy_1_2": -223.93045043945312, "logps_train/policy_1_l": -210.92416381835938, "logps_train/policy_1_w": -130.3551025390625, "logps_train/policy_2_2": -163.48538208007812, "logps_train/policy_2_w": -186.73037719726562, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.0257058143615723, "rewards_train/1-l": -2.8361663818359375, "rewards_train/1-w": 3.459803342819214, "rewards_train/2-2": 3.8827123641967773, "rewards_train/2-w": 1.16914963722229, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.295969724655151, "rewards_train/margins_1": 2.4340975284576416, "rewards_train/margins_2": 2.7135627269744873, "step": 415 }, { "epoch": 1.24, "logps_train/policy_1_2": -153.4019012451172, "logps_train/policy_1_l": -143.5099639892578, "logps_train/policy_1_w": -96.70480346679688, "logps_train/policy_2_2": -126.39437866210938, "logps_train/policy_2_w": -118.58029174804688, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 1.5973100662231445, "rewards_train/1-l": -1.9771685600280762, "rewards_train/1-w": 2.3830349445343018, "rewards_train/2-2": 2.7902491092681885, "rewards_train/2-w": 1.4544707536697388, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.360203504562378, "rewards_train/margins_1": 0.7857248783111572, "rewards_train/margins_2": 1.3357783555984497, "step": 415 }, { "epoch": 1.24, "logps_train/policy_1_2": -109.72256469726562, "logps_train/policy_1_l": -169.41287231445312, "logps_train/policy_1_w": -111.21977233886719, "logps_train/policy_2_2": -80.09104919433594, "logps_train/policy_2_w": -143.23458862304688, "logps_train/ref_1_2": -119.5, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 0.9722749590873718, "rewards_train/1-l": -3.462480068206787, "rewards_train/1-w": 2.4418888092041016, "rewards_train/2-2": 2.0035910606384277, "rewards_train/2-w": 1.3764430284500122, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.904368877410889, "rewards_train/margins_1": 1.4696138501167297, "rewards_train/margins_2": 0.6271480321884155, "step": 415 }, { "epoch": 1.24, "logps_train/policy_1_2": -139.39305114746094, "logps_train/policy_1_l": -154.8985137939453, "logps_train/policy_1_w": -108.97354125976562, "logps_train/policy_2_2": -98.68566131591797, "logps_train/policy_2_w": -162.97006225585938, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 1.538038969039917, "rewards_train/1-l": -2.470319986343384, "rewards_train/1-w": 4.053426742553711, "rewards_train/2-2": 2.888465404510498, "rewards_train/2-w": 1.1881518363952637, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.523746728897095, "rewards_train/margins_1": 2.515387773513794, "rewards_train/margins_2": 1.7003135681152344, "step": 415 }, { "epoch": 1.24, "logps_train/policy_1_2": -153.04791259765625, "logps_train/policy_1_l": -157.66944885253906, "logps_train/policy_1_w": -189.11001586914062, "logps_train/policy_2_2": -120.59646606445312, "logps_train/policy_2_w": -223.78900146484375, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -226.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -242.0, "rewards_train/1-2": 3.1366162300109863, "rewards_train/1-l": -2.802443742752075, "rewards_train/1-w": 3.723372459411621, "rewards_train/2-2": 3.9880099296569824, "rewards_train/2-w": 1.9109439849853516, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.525816202163696, "rewards_train/margins_1": 0.5867562294006348, "rewards_train/margins_2": 2.077065944671631, "step": 415 }, { "epoch": 1.24, "logps_train/policy_1_2": -192.58932495117188, "logps_train/policy_1_l": -250.706787109375, "logps_train/policy_1_w": -103.22722625732422, "logps_train/policy_2_2": -145.25088500976562, "logps_train/policy_2_w": -152.42669677734375, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -216.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 1.9777867794036865, "rewards_train/1-l": -3.5371828079223633, "rewards_train/1-w": 3.0561835765838623, "rewards_train/2-2": 3.5803802013397217, "rewards_train/2-w": 1.6662182807922363, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.593366384506226, "rewards_train/margins_1": 1.0783967971801758, "rewards_train/margins_2": 1.9141619205474854, "step": 415 }, { "epoch": 1.25, "learning_rate": 1.7227611757911721e-06, "loss": 0.5003, "step": 416 }, { "epoch": 1.25, "logps_train/policy_1_2": -161.90634155273438, "logps_train/policy_1_l": -198.73558044433594, "logps_train/policy_1_w": -173.49072265625, "logps_train/policy_2_2": -129.1329345703125, "logps_train/policy_2_w": -212.45022583007812, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -235.0, "rewards_train/1-2": 1.7925682067871094, "rewards_train/1-l": -3.1895737648010254, "rewards_train/1-w": 4.38803768157959, "rewards_train/2-2": 2.828894853591919, "rewards_train/2-w": 2.314354181289673, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.577611446380615, "rewards_train/margins_1": 2.5954694747924805, "rewards_train/margins_2": 0.5145406723022461, "step": 416 }, { "epoch": 1.25, "logps_train/policy_1_2": -112.55488586425781, "logps_train/policy_1_l": -121.00877380371094, "logps_train/policy_1_w": -106.19212341308594, "logps_train/policy_2_2": -88.76411437988281, "logps_train/policy_2_w": -134.79660034179688, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -95.5, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.3273248672485352, "rewards_train/1-l": -2.5469717979431152, "rewards_train/1-w": 3.267115831375122, "rewards_train/2-2": 2.3310108184814453, "rewards_train/2-w": 1.871901273727417, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.814087629318237, "rewards_train/margins_1": 1.939790964126587, "rewards_train/margins_2": 0.4591095447540283, "step": 416 }, { "epoch": 1.25, "logps_train/policy_1_2": -162.81094360351562, "logps_train/policy_1_l": -177.55361938476562, "logps_train/policy_1_w": -107.76760864257812, "logps_train/policy_2_2": -122.34190368652344, "logps_train/policy_2_w": -138.58038330078125, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.2267189025878906, "rewards_train/1-l": -3.0037999153137207, "rewards_train/1-w": 3.6060516834259033, "rewards_train/2-2": 2.9040913581848145, "rewards_train/2-w": 2.420086622238159, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 6.609851598739624, "rewards_train/margins_1": 2.3793327808380127, "rewards_train/margins_2": 0.4840047359466553, "step": 416 }, { "epoch": 1.25, "logps_train/policy_1_2": -76.35781860351562, "logps_train/policy_1_l": -96.56816101074219, "logps_train/policy_1_w": -79.29481506347656, "logps_train/policy_2_2": -55.91215515136719, "logps_train/policy_2_w": -104.3633804321289, "logps_train/ref_1_2": -83.5, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -70.5, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 0.7314055562019348, "rewards_train/1-l": -1.526737928390503, "rewards_train/1-w": 2.108799934387207, "rewards_train/2-2": 1.434565782546997, "rewards_train/2-w": 1.1785054206848145, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.63553786277771, "rewards_train/margins_1": 1.3773943781852722, "rewards_train/margins_2": 0.2560603618621826, "step": 416 }, { "epoch": 1.25, "logps_train/policy_1_2": -175.13790893554688, "logps_train/policy_1_l": -228.98594665527344, "logps_train/policy_1_w": -129.57281494140625, "logps_train/policy_2_2": -138.4117431640625, "logps_train/policy_2_w": -163.24276733398438, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 1.8471468687057495, "rewards_train/1-l": -3.0403919219970703, "rewards_train/1-w": 2.616546154022217, "rewards_train/2-2": 3.4869515895843506, "rewards_train/2-w": 1.585097312927246, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.656938076019287, "rewards_train/margins_1": 0.7693992853164673, "rewards_train/margins_2": 1.9018542766571045, "step": 416 }, { "epoch": 1.25, "logps_train/policy_1_2": -173.1624755859375, "logps_train/policy_1_l": -210.28433227539062, "logps_train/policy_1_w": -98.17070007324219, "logps_train/policy_2_2": -131.30825805664062, "logps_train/policy_2_w": -128.80323791503906, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.2400028705596924, "rewards_train/1-l": -2.807926654815674, "rewards_train/1-w": 2.641523838043213, "rewards_train/2-2": 3.026986598968506, "rewards_train/2-w": 1.5642073154449463, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.449450492858887, "rewards_train/margins_1": 1.4015209674835205, "rewards_train/margins_2": 1.4627792835235596, "step": 416 }, { "epoch": 1.25, "logps_train/policy_1_2": -69.98695373535156, "logps_train/policy_1_l": -126.39662170410156, "logps_train/policy_1_w": -81.49079132080078, "logps_train/policy_2_2": -49.02222442626953, "logps_train/policy_2_w": -106.4445571899414, "logps_train/ref_1_2": -85.0, "logps_train/ref_1_l": -105.5, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -70.0, "logps_train/ref_2_w": -121.5, "rewards_train/1-2": 1.535679817199707, "rewards_train/1-l": -2.095130205154419, "rewards_train/1-w": 2.4000420570373535, "rewards_train/2-2": 2.1227777004241943, "rewards_train/2-w": 1.519802212715149, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.4951722621917725, "rewards_train/margins_1": 0.8643622398376465, "rewards_train/margins_2": 0.6029754877090454, "step": 416 }, { "epoch": 1.25, "logps_train/policy_1_2": -185.69357299804688, "logps_train/policy_1_l": -143.66726684570312, "logps_train/policy_1_w": -83.03431701660156, "logps_train/policy_2_2": -128.96734619140625, "logps_train/policy_2_w": -115.98198699951172, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -113.5, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -0.07092094421386719, "rewards_train/1-l": -1.7007102966308594, "rewards_train/1-w": 3.0504746437072754, "rewards_train/2-2": 2.0997495651245117, "rewards_train/2-w": 1.8111765384674072, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.751184940338135, "rewards_train/margins_1": 3.1213955879211426, "rewards_train/margins_2": 0.2885730266571045, "step": 416 }, { "epoch": 1.25, "logps_train/policy_1_2": -179.53057861328125, "logps_train/policy_1_l": -184.0303497314453, "logps_train/policy_1_w": -180.35626220703125, "logps_train/policy_2_2": -148.48291015625, "logps_train/policy_2_w": -215.00210571289062, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -245.0, "rewards_train/1-2": 2.6164731979370117, "rewards_train/1-l": -1.8579176664352417, "rewards_train/1-w": 4.363592624664307, "rewards_train/2-2": 3.5220208168029785, "rewards_train/2-w": 2.9976401329040527, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.221510291099548, "rewards_train/margins_1": 1.747119426727295, "rewards_train/margins_2": 0.5243806838989258, "step": 417 }, { "epoch": 1.25, "logps_train/policy_1_2": -175.180908203125, "logps_train/policy_1_l": -208.3994903564453, "logps_train/policy_1_w": -116.48905181884766, "logps_train/policy_2_2": -142.25021362304688, "logps_train/policy_2_w": -173.93710327148438, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 2.0756607055664062, "rewards_train/1-l": -3.1915111541748047, "rewards_train/1-w": 4.016719341278076, "rewards_train/2-2": 3.4531030654907227, "rewards_train/2-w": 1.8359770774841309, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.208230495452881, "rewards_train/margins_1": 1.94105863571167, "rewards_train/margins_2": 1.6171259880065918, "step": 417 }, { "epoch": 1.25, "logps_train/policy_1_2": -135.30361938476562, "logps_train/policy_1_l": -208.66725158691406, "logps_train/policy_1_w": -154.9137420654297, "logps_train/policy_2_2": -98.6539535522461, "logps_train/policy_2_w": -200.5204620361328, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.64424729347229, "rewards_train/1-l": -2.6768815517425537, "rewards_train/1-w": 3.082063674926758, "rewards_train/2-2": 2.8053078651428223, "rewards_train/2-w": 1.2120167016983032, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.7589452266693115, "rewards_train/margins_1": 1.4378163814544678, "rewards_train/margins_2": 1.593291163444519, "step": 417 }, { "epoch": 1.25, "logps_train/policy_1_2": -240.81112670898438, "logps_train/policy_1_l": -149.70062255859375, "logps_train/policy_1_w": -122.01773071289062, "logps_train/policy_2_2": -176.19094848632812, "logps_train/policy_2_w": -169.4910888671875, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 0.25926727056503296, "rewards_train/1-l": -1.8810009956359863, "rewards_train/1-w": 3.158578395843506, "rewards_train/2-2": 3.3568830490112305, "rewards_train/2-w": 1.2208120822906494, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.039579391479492, "rewards_train/margins_1": 2.899311125278473, "rewards_train/margins_2": 2.136070966720581, "step": 417 }, { "epoch": 1.25, "logps_train/policy_1_2": -146.13580322265625, "logps_train/policy_1_l": -218.36395263671875, "logps_train/policy_1_w": -121.15179443359375, "logps_train/policy_2_2": -109.27302551269531, "logps_train/policy_2_w": -165.98158264160156, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.839545726776123, "rewards_train/1-l": -2.873894691467285, "rewards_train/1-w": 3.05747652053833, "rewards_train/2-2": 3.0695722103118896, "rewards_train/2-w": 1.5768420696258545, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.931371212005615, "rewards_train/margins_1": 1.217930793762207, "rewards_train/margins_2": 1.4927301406860352, "step": 417 }, { "epoch": 1.25, "logps_train/policy_1_2": -90.92546844482422, "logps_train/policy_1_l": -44.12348937988281, "logps_train/policy_1_w": -97.39170837402344, "logps_train/policy_2_2": -64.69576263427734, "logps_train/policy_2_w": -133.8257293701172, "logps_train/ref_1_2": -98.0, "logps_train/ref_1_l": -32.5, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -82.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.7262030839920044, "rewards_train/1-l": -1.1492629051208496, "rewards_train/1-w": 3.8217666149139404, "rewards_train/2-2": 1.7456581592559814, "rewards_train/2-w": 1.7682079076766968, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.97102952003479, "rewards_train/margins_1": 3.095563530921936, "rewards_train/margins_2": -0.022549748420715332, "step": 417 }, { "epoch": 1.25, "logps_train/policy_1_2": -119.21197509765625, "logps_train/policy_1_l": -169.97994995117188, "logps_train/policy_1_w": -146.83596801757812, "logps_train/policy_2_2": -93.00514221191406, "logps_train/policy_2_w": -190.7022705078125, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 1.733490228652954, "rewards_train/1-l": -2.4653775691986084, "rewards_train/1-w": 3.8445286750793457, "rewards_train/2-2": 2.5791735649108887, "rewards_train/2-w": 1.407899022102356, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.309906244277954, "rewards_train/margins_1": 2.1110384464263916, "rewards_train/margins_2": 1.1712745428085327, "step": 417 }, { "epoch": 1.25, "logps_train/policy_1_2": -57.109580993652344, "logps_train/policy_1_l": -36.59564208984375, "logps_train/policy_1_w": -37.78352737426758, "logps_train/policy_2_2": -27.54277229309082, "logps_train/policy_2_w": -65.75336456298828, "logps_train/ref_1_2": -64.5, "logps_train/ref_1_l": -27.25, "logps_train/ref_1_w": -56.25, "logps_train/ref_2_2": -44.5, "logps_train/ref_2_w": -71.5, "rewards_train/1-2": 0.7519324421882629, "rewards_train/1-l": -0.928655743598938, "rewards_train/1-w": 1.8568034172058105, "rewards_train/2-2": 1.69064462184906, "rewards_train/2-w": 0.5832570791244507, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7854591608047485, "rewards_train/margins_1": 1.1048709750175476, "rewards_train/margins_2": 1.1073875427246094, "step": 417 }, { "epoch": 1.25, "learning_rate": 1.6993253653429064e-06, "loss": 0.4592, "step": 418 }, { "epoch": 1.25, "logps_train/policy_1_2": -205.9736328125, "logps_train/policy_1_l": -88.90886688232422, "logps_train/policy_1_w": -60.885406494140625, "logps_train/policy_2_2": -147.05897521972656, "logps_train/policy_2_w": -107.77758026123047, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": 0.04401257634162903, "rewards_train/1-l": -0.8082514405250549, "rewards_train/1-w": 2.598928928375244, "rewards_train/2-2": 2.3942859172821045, "rewards_train/2-w": 1.273597002029419, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.407180368900299, "rewards_train/margins_1": 2.554916352033615, "rewards_train/margins_2": 1.1206889152526855, "step": 418 }, { "epoch": 1.25, "logps_train/policy_1_2": -175.5948486328125, "logps_train/policy_1_l": -158.1411895751953, "logps_train/policy_1_w": -120.25994873046875, "logps_train/policy_2_2": -135.7015838623047, "logps_train/policy_2_w": -158.18605041503906, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.795203447341919, "rewards_train/1-l": -1.8774001598358154, "rewards_train/1-w": 3.352130174636841, "rewards_train/2-2": 3.6767163276672363, "rewards_train/2-w": 1.8501455783843994, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.229530334472656, "rewards_train/margins_1": 1.5569267272949219, "rewards_train/margins_2": 1.826570749282837, "step": 418 }, { "epoch": 1.25, "logps_train/policy_1_2": -197.31024169921875, "logps_train/policy_1_l": -193.02224731445312, "logps_train/policy_1_w": -177.4995880126953, "logps_train/policy_2_2": -161.7628173828125, "logps_train/policy_2_w": -218.40084838867188, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -237.0, "rewards_train/1-2": 2.190851926803589, "rewards_train/1-l": -2.524880886077881, "rewards_train/1-w": 3.662540912628174, "rewards_train/2-2": 3.573718309402466, "rewards_train/2-w": 1.7974154949188232, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.187421798706055, "rewards_train/margins_1": 1.471688985824585, "rewards_train/margins_2": 1.7763028144836426, "step": 418 }, { "epoch": 1.25, "logps_train/policy_1_2": -112.05128479003906, "logps_train/policy_1_l": -144.6385955810547, "logps_train/policy_1_w": -50.62684631347656, "logps_train/policy_2_2": -89.38184356689453, "logps_train/policy_2_w": -71.46469116210938, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -71.0, "logps_train/ref_2_2": -113.5, "logps_train/ref_2_w": -83.0, "rewards_train/1-2": 1.4233872890472412, "rewards_train/1-l": -1.7996017932891846, "rewards_train/1-w": 2.0471792221069336, "rewards_train/2-2": 2.420604705810547, "rewards_train/2-w": 1.189517617225647, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.846781015396118, "rewards_train/margins_1": 0.6237919330596924, "rewards_train/margins_2": 1.2310870885849, "step": 418 }, { "epoch": 1.25, "logps_train/policy_1_2": -161.84571838378906, "logps_train/policy_1_l": -190.2035369873047, "logps_train/policy_1_w": -139.1221923828125, "logps_train/policy_2_2": -116.77201080322266, "logps_train/policy_2_w": -192.3815460205078, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 1.1873027086257935, "rewards_train/1-l": -2.6147878170013428, "rewards_train/1-w": 3.6715688705444336, "rewards_train/2-2": 2.8798303604125977, "rewards_train/2-w": 1.2800097465515137, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.286356687545776, "rewards_train/margins_1": 2.48426616191864, "rewards_train/margins_2": 1.599820613861084, "step": 418 }, { "epoch": 1.25, "logps_train/policy_1_2": -94.00321960449219, "logps_train/policy_1_l": -169.9146728515625, "logps_train/policy_1_w": -102.79953002929688, "logps_train/policy_2_2": -66.83910369873047, "logps_train/policy_2_w": -135.2732696533203, "logps_train/ref_1_2": -102.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 0.7899119257926941, "rewards_train/1-l": -2.3367793560028076, "rewards_train/1-w": 2.521609306335449, "rewards_train/2-2": 1.9785895347595215, "rewards_train/2-w": 0.7632982730865479, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.858388662338257, "rewards_train/margins_1": 1.7316973805427551, "rewards_train/margins_2": 1.2152912616729736, "step": 418 }, { "epoch": 1.25, "logps_train/policy_1_2": -186.4958038330078, "logps_train/policy_1_l": -218.09823608398438, "logps_train/policy_1_w": -143.81710815429688, "logps_train/policy_2_2": -142.39271545410156, "logps_train/policy_2_w": -176.206298828125, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.7785439491271973, "rewards_train/1-l": -2.9535727500915527, "rewards_train/1-w": 4.4495391845703125, "rewards_train/2-2": 3.1310410499572754, "rewards_train/2-w": 3.44187068939209, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 7.403111934661865, "rewards_train/margins_1": 2.6709952354431152, "rewards_train/margins_2": -0.31082963943481445, "step": 418 }, { "epoch": 1.25, "logps_train/policy_1_2": -89.594970703125, "logps_train/policy_1_l": -122.39952087402344, "logps_train/policy_1_w": -110.7422866821289, "logps_train/policy_2_2": -67.51307678222656, "logps_train/policy_2_w": -144.99459838867188, "logps_train/ref_1_2": -98.5, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -84.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.8865969181060791, "rewards_train/1-l": -1.8551864624023438, "rewards_train/1-w": 2.216787099838257, "rewards_train/2-2": 1.6658800840377808, "rewards_train/2-w": 0.6966341137886047, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.071973562240601, "rewards_train/margins_1": 1.3301901817321777, "rewards_train/margins_2": 0.969245970249176, "step": 418 }, { "epoch": 1.25, "logps_train/policy_1_2": -80.31877136230469, "logps_train/policy_1_l": -105.695068359375, "logps_train/policy_1_w": -79.12310028076172, "logps_train/policy_2_2": -63.175846099853516, "logps_train/policy_2_w": -110.04054260253906, "logps_train/ref_1_2": -89.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -79.5, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 0.8985916376113892, "rewards_train/1-l": -1.5304439067840576, "rewards_train/1-w": 2.1033148765563965, "rewards_train/2-2": 1.625383973121643, "rewards_train/2-w": 0.7619606256484985, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.633758783340454, "rewards_train/margins_1": 1.2047232389450073, "rewards_train/margins_2": 0.8634233474731445, "step": 419 }, { "epoch": 1.25, "logps_train/policy_1_2": -132.845703125, "logps_train/policy_1_l": -84.45211029052734, "logps_train/policy_1_w": -50.941375732421875, "logps_train/policy_2_2": -107.67424774169922, "logps_train/policy_2_w": -85.14082336425781, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -62.25, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -100.0, "rewards_train/1-2": 0.8525390028953552, "rewards_train/1-l": -2.221773147583008, "rewards_train/1-w": 2.7136752605438232, "rewards_train/2-2": 1.832965612411499, "rewards_train/2-w": 1.4554487466812134, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.935448408126831, "rewards_train/margins_1": 1.861136257648468, "rewards_train/margins_2": 0.37751686573028564, "step": 419 }, { "epoch": 1.25, "logps_train/policy_1_2": -249.83352661132812, "logps_train/policy_1_l": -220.96566772460938, "logps_train/policy_1_w": -140.92062377929688, "logps_train/policy_2_2": -173.71975708007812, "logps_train/policy_2_w": -206.17819213867188, "logps_train/ref_1_2": -254.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 0.4353974461555481, "rewards_train/1-l": -3.5778167247772217, "rewards_train/1-w": 3.470438241958618, "rewards_train/2-2": 3.07021164894104, "rewards_train/2-w": 0.6665554046630859, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.04825496673584, "rewards_train/margins_1": 3.03504079580307, "rewards_train/margins_2": 2.403656244277954, "step": 419 }, { "epoch": 1.25, "logps_train/policy_1_2": -290.576171875, "logps_train/policy_1_l": -239.30874633789062, "logps_train/policy_1_w": -161.9471435546875, "logps_train/policy_2_2": -244.8723907470703, "logps_train/policy_2_w": -196.075927734375, "logps_train/ref_1_2": -328.0, "logps_train/ref_1_l": -215.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -308.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 3.8455092906951904, "rewards_train/1-l": -2.4586095809936523, "rewards_train/1-w": 3.7818477153778076, "rewards_train/2-2": 6.202605724334717, "rewards_train/2-w": 2.8010005950927734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.24045729637146, "rewards_train/margins_1": -0.06366157531738281, "rewards_train/margins_2": 3.4016051292419434, "step": 419 }, { "epoch": 1.25, "logps_train/policy_1_2": -163.09872436523438, "logps_train/policy_1_l": -182.3463134765625, "logps_train/policy_1_w": -100.92644500732422, "logps_train/policy_2_2": -126.78262329101562, "logps_train/policy_2_w": -136.78311157226562, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 2.409268856048584, "rewards_train/1-l": -2.594982385635376, "rewards_train/1-w": 2.7985665798187256, "rewards_train/2-2": 4.3201751708984375, "rewards_train/2-w": 1.6255954504013062, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.393548965454102, "rewards_train/margins_1": 0.3892977237701416, "rewards_train/margins_2": 2.6945797204971313, "step": 419 }, { "epoch": 1.25, "logps_train/policy_1_2": -172.05523681640625, "logps_train/policy_1_l": -148.91506958007812, "logps_train/policy_1_w": -94.97177124023438, "logps_train/policy_2_2": -125.1203384399414, "logps_train/policy_2_w": -120.72732543945312, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 0.8772895336151123, "rewards_train/1-l": -2.600687026977539, "rewards_train/1-w": 3.066103935241699, "rewards_train/2-2": 3.1176538467407227, "rewards_train/2-w": 1.7655494213104248, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.666790962219238, "rewards_train/margins_1": 2.188814401626587, "rewards_train/margins_2": 1.3521044254302979, "step": 419 }, { "epoch": 1.25, "logps_train/policy_1_2": -134.62901306152344, "logps_train/policy_1_l": -124.49568176269531, "logps_train/policy_1_w": -81.20189666748047, "logps_train/policy_2_2": -109.65339660644531, "logps_train/policy_2_w": -103.29374694824219, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -111.5, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 2.0949110984802246, "rewards_train/1-l": -2.469294548034668, "rewards_train/1-w": 3.0313730239868164, "rewards_train/2-2": 3.0565359592437744, "rewards_train/2-w": 2.2175002098083496, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.500667572021484, "rewards_train/margins_1": 0.9364619255065918, "rewards_train/margins_2": 0.8390357494354248, "step": 419 }, { "epoch": 1.25, "logps_train/policy_1_2": -167.7572021484375, "logps_train/policy_1_l": -146.283203125, "logps_train/policy_1_w": -119.76002502441406, "logps_train/policy_2_2": -110.34423828125, "logps_train/policy_2_w": -169.88555908203125, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 0.9649045467376709, "rewards_train/1-l": -1.8033207654953003, "rewards_train/1-w": 3.166184663772583, "rewards_train/2-2": 2.9624505043029785, "rewards_train/2-w": 1.2536308765411377, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.969505429267883, "rewards_train/margins_1": 2.201280117034912, "rewards_train/margins_2": 1.7088196277618408, "step": 419 }, { "epoch": 1.26, "learning_rate": 1.6759676993274805e-06, "loss": 0.4932, "step": 420 }, { "epoch": 1.26, "logps_train/policy_1_2": -209.27679443359375, "logps_train/policy_1_l": -177.5067138671875, "logps_train/policy_1_w": -159.32891845703125, "logps_train/policy_2_2": -169.1751708984375, "logps_train/policy_2_w": -194.01336669921875, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -203.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 1.8535710573196411, "rewards_train/1-l": -2.12078857421875, "rewards_train/1-w": 3.5655465126037598, "rewards_train/2-2": 3.3856077194213867, "rewards_train/2-w": 2.0767886638641357, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.68633508682251, "rewards_train/margins_1": 1.7119754552841187, "rewards_train/margins_2": 1.308819055557251, "step": 420 }, { "epoch": 1.26, "logps_train/policy_1_2": -121.71842193603516, "logps_train/policy_1_l": -138.57431030273438, "logps_train/policy_1_w": -99.7251205444336, "logps_train/policy_2_2": -97.08723449707031, "logps_train/policy_2_w": -145.21218872070312, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.2562832832336426, "rewards_train/1-l": -2.1289162635803223, "rewards_train/1-w": 3.2122535705566406, "rewards_train/2-2": 2.414616584777832, "rewards_train/2-w": 1.0740931034088135, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.341169834136963, "rewards_train/margins_1": 1.955970287322998, "rewards_train/margins_2": 1.3405234813690186, "step": 420 }, { "epoch": 1.26, "logps_train/policy_1_2": -79.78059387207031, "logps_train/policy_1_l": -125.63331604003906, "logps_train/policy_1_w": -75.05203247070312, "logps_train/policy_2_2": -56.28214645385742, "logps_train/policy_2_w": -110.25653076171875, "logps_train/ref_1_2": -90.5, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -76.0, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": 1.0867849588394165, "rewards_train/1-l": -2.5836448669433594, "rewards_train/1-w": 2.542452812194824, "rewards_train/2-2": 1.9522541761398315, "rewards_train/2-w": 1.0259085893630981, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.126097679138184, "rewards_train/margins_1": 1.4556678533554077, "rewards_train/margins_2": 0.9263455867767334, "step": 420 }, { "epoch": 1.26, "logps_train/policy_1_2": -118.52210235595703, "logps_train/policy_1_l": -129.9161834716797, "logps_train/policy_1_w": -49.16868591308594, "logps_train/policy_2_2": -85.17228698730469, "logps_train/policy_2_w": -78.08180236816406, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -68.5, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -88.5, "rewards_train/1-2": 0.5614610910415649, "rewards_train/1-l": -2.1836094856262207, "rewards_train/1-w": 1.9469985961914062, "rewards_train/2-2": 1.9718338251113892, "rewards_train/2-w": 1.0191640853881836, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.130608081817627, "rewards_train/margins_1": 1.3855375051498413, "rewards_train/margins_2": 0.9526697397232056, "step": 420 }, { "epoch": 1.26, "logps_train/policy_1_2": -103.18788146972656, "logps_train/policy_1_l": -133.47415161132812, "logps_train/policy_1_w": -107.9349136352539, "logps_train/policy_2_2": -75.40350341796875, "logps_train/policy_2_w": -147.47402954101562, "logps_train/ref_1_2": -114.5, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.1380479335784912, "rewards_train/1-l": -2.255033016204834, "rewards_train/1-w": 3.481508493423462, "rewards_train/2-2": 2.2807435989379883, "rewards_train/2-w": 1.879941701889038, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.736541509628296, "rewards_train/margins_1": 2.3434605598449707, "rewards_train/margins_2": 0.4008018970489502, "step": 420 }, { "epoch": 1.26, "logps_train/policy_1_2": -76.17320251464844, "logps_train/policy_1_l": -88.77367401123047, "logps_train/policy_1_w": -99.2012939453125, "logps_train/policy_2_2": -56.6026496887207, "logps_train/policy_2_w": -130.4098358154297, "logps_train/ref_1_2": -86.0, "logps_train/ref_1_l": -76.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 0.9955707788467407, "rewards_train/1-l": -1.2503652572631836, "rewards_train/1-w": 2.4167840480804443, "rewards_train/2-2": 1.7272348403930664, "rewards_train/2-w": 1.4056965112686157, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.667149305343628, "rewards_train/margins_1": 1.4212132692337036, "rewards_train/margins_2": 0.3215383291244507, "step": 420 }, { "epoch": 1.26, "logps_train/policy_1_2": -260.75067138671875, "logps_train/policy_1_l": -276.2852478027344, "logps_train/policy_1_w": -169.568115234375, "logps_train/policy_2_2": -213.99557495117188, "logps_train/policy_2_w": -210.40570068359375, "logps_train/ref_1_2": -284.0, "logps_train/ref_1_l": -248.0, "logps_train/ref_1_w": -209.0, "logps_train/ref_2_2": -253.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 2.3749313354492188, "rewards_train/1-l": -2.8628997802734375, "rewards_train/1-w": 3.927563190460205, "rewards_train/2-2": 3.869192123413086, "rewards_train/2-w": 2.684429407119751, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.790462970733643, "rewards_train/margins_1": 1.5526318550109863, "rewards_train/margins_2": 1.184762716293335, "step": 420 }, { "epoch": 1.26, "logps_train/policy_1_2": -180.189697265625, "logps_train/policy_1_l": -142.55459594726562, "logps_train/policy_1_w": -103.8993148803711, "logps_train/policy_2_2": -127.70034790039062, "logps_train/policy_2_w": -160.80242919921875, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": 0.9747808575630188, "rewards_train/1-l": -2.1705965995788574, "rewards_train/1-w": 3.5272562503814697, "rewards_train/2-2": 2.9213719367980957, "rewards_train/2-w": 1.21663236618042, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.697852849960327, "rewards_train/margins_1": 2.552475392818451, "rewards_train/margins_2": 1.7047395706176758, "step": 420 }, { "epoch": 1.26, "logps_train/policy_1_2": -146.9279327392578, "logps_train/policy_1_l": -158.45867919921875, "logps_train/policy_1_w": -128.9041748046875, "logps_train/policy_2_2": -103.2650375366211, "logps_train/policy_2_w": -162.6111297607422, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 0.9648237824440002, "rewards_train/1-l": -1.8747248649597168, "rewards_train/1-w": 3.21197509765625, "rewards_train/2-2": 2.775449275970459, "rewards_train/2-w": 1.6617882251739502, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.086699962615967, "rewards_train/margins_1": 2.2471513152122498, "rewards_train/margins_2": 1.1136610507965088, "step": 421 }, { "epoch": 1.26, "logps_train/policy_1_2": -106.32304382324219, "logps_train/policy_1_l": -154.33999633789062, "logps_train/policy_1_w": -179.82557678222656, "logps_train/policy_2_2": -90.31261444091797, "logps_train/policy_2_w": -203.45115661621094, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.8391799926757812, "rewards_train/1-l": -2.0152499675750732, "rewards_train/1-w": 2.4143178462982178, "rewards_train/2-2": 2.538269519805908, "rewards_train/2-w": 1.5017600059509277, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.429567813873291, "rewards_train/margins_1": 0.5751378536224365, "rewards_train/margins_2": 1.0365095138549805, "step": 421 }, { "epoch": 1.26, "logps_train/policy_1_2": -143.426513671875, "logps_train/policy_1_l": -162.30844116210938, "logps_train/policy_1_w": -124.62288665771484, "logps_train/policy_2_2": -113.99906921386719, "logps_train/policy_2_w": -158.98350524902344, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 2.071410894393921, "rewards_train/1-l": -1.7214689254760742, "rewards_train/1-w": 3.308804988861084, "rewards_train/2-2": 3.0000929832458496, "rewards_train/2-w": 1.9297740459442139, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.030273914337158, "rewards_train/margins_1": 1.237394094467163, "rewards_train/margins_2": 1.0703189373016357, "step": 421 }, { "epoch": 1.26, "logps_train/policy_1_2": -211.0606231689453, "logps_train/policy_1_l": -161.42799377441406, "logps_train/policy_1_w": -129.51531982421875, "logps_train/policy_2_2": -160.188232421875, "logps_train/policy_2_w": -180.33203125, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -201.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.8845629692077637, "rewards_train/1-l": -1.7799079418182373, "rewards_train/1-w": 3.6234683990478516, "rewards_train/2-2": 4.088208198547363, "rewards_train/2-w": 1.7480461597442627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.403376340866089, "rewards_train/margins_1": 1.738905429840088, "rewards_train/margins_2": 2.3401620388031006, "step": 421 }, { "epoch": 1.26, "logps_train/policy_1_2": -224.7019500732422, "logps_train/policy_1_l": -210.91009521484375, "logps_train/policy_1_w": -137.0233154296875, "logps_train/policy_2_2": -174.1022186279297, "logps_train/policy_2_w": -184.17745971679688, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 0.12316420674324036, "rewards_train/1-l": -2.2824153900146484, "rewards_train/1-w": 3.1332151889801025, "rewards_train/2-2": 2.0050125122070312, "rewards_train/2-w": 1.636160135269165, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.415630578994751, "rewards_train/margins_1": 3.010050982236862, "rewards_train/margins_2": 0.3688523769378662, "step": 421 }, { "epoch": 1.26, "logps_train/policy_1_2": -247.49136352539062, "logps_train/policy_1_l": -240.48260498046875, "logps_train/policy_1_w": -115.27313995361328, "logps_train/policy_2_2": -195.99905395507812, "logps_train/policy_2_w": -156.18429565429688, "logps_train/ref_1_2": -255.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 0.7680501937866211, "rewards_train/1-l": -2.8222830295562744, "rewards_train/1-w": 3.4107723236083984, "rewards_train/2-2": 3.607908010482788, "rewards_train/2-w": 2.0444610118865967, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.233055353164673, "rewards_train/margins_1": 2.6427221298217773, "rewards_train/margins_2": 1.5634469985961914, "step": 421 }, { "epoch": 1.26, "logps_train/policy_1_2": -189.82049560546875, "logps_train/policy_1_l": -145.6305694580078, "logps_train/policy_1_w": -113.54651641845703, "logps_train/policy_2_2": -154.9004364013672, "logps_train/policy_2_w": -144.807373046875, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.1202940940856934, "rewards_train/1-l": -1.5720419883728027, "rewards_train/1-w": 2.8812856674194336, "rewards_train/2-2": 2.7494101524353027, "rewards_train/2-w": 1.5723869800567627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.453327655792236, "rewards_train/margins_1": 1.7609915733337402, "rewards_train/margins_2": 1.17702317237854, "step": 421 }, { "epoch": 1.26, "logps_train/policy_1_2": -215.763427734375, "logps_train/policy_1_l": -172.6822509765625, "logps_train/policy_1_w": -105.50294494628906, "logps_train/policy_2_2": -160.44122314453125, "logps_train/policy_2_w": -136.40972900390625, "logps_train/ref_1_2": -227.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 1.0736569166183472, "rewards_train/1-l": -3.358849048614502, "rewards_train/1-w": 2.1928696632385254, "rewards_train/2-2": 3.5855660438537598, "rewards_train/2-w": 1.289886713027954, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.551718711853027, "rewards_train/margins_1": 1.1192127466201782, "rewards_train/margins_2": 2.2956793308258057, "step": 421 }, { "epoch": 1.26, "learning_rate": 1.6526904574119213e-06, "loss": 0.4442, "step": 422 }, { "epoch": 1.26, "logps_train/policy_1_2": -182.26217651367188, "logps_train/policy_1_l": -144.73194885253906, "logps_train/policy_1_w": -110.99878692626953, "logps_train/policy_2_2": -137.01531982421875, "logps_train/policy_2_w": -148.2737579345703, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.5300321578979492, "rewards_train/1-l": -2.554445266723633, "rewards_train/1-w": 3.2313716411590576, "rewards_train/2-2": 3.1859683990478516, "rewards_train/2-w": 1.4702800512313843, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.78581690788269, "rewards_train/margins_1": 1.7013394832611084, "rewards_train/margins_2": 1.7156883478164673, "step": 422 }, { "epoch": 1.26, "logps_train/policy_1_2": -139.28256225585938, "logps_train/policy_1_l": -125.82688903808594, "logps_train/policy_1_w": -53.3324089050293, "logps_train/policy_2_2": -94.3667221069336, "logps_train/policy_2_w": -78.7252197265625, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -71.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -89.0, "rewards_train/1-2": 0.5412755012512207, "rewards_train/1-l": -2.398313522338867, "rewards_train/1-w": 1.7839465141296387, "rewards_train/2-2": 2.399265766143799, "rewards_train/2-w": 1.0313844680786133, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.182260036468506, "rewards_train/margins_1": 1.242671012878418, "rewards_train/margins_2": 1.3678812980651855, "step": 422 }, { "epoch": 1.26, "logps_train/policy_1_2": -92.5892105102539, "logps_train/policy_1_l": -97.00215148925781, "logps_train/policy_1_w": -91.43524169921875, "logps_train/policy_2_2": -70.72514343261719, "logps_train/policy_2_w": -119.49589538574219, "logps_train/ref_1_2": -106.0, "logps_train/ref_1_l": -80.5, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 1.369985580444336, "rewards_train/1-l": -1.676142930984497, "rewards_train/1-w": 2.698516368865967, "rewards_train/2-2": 2.107954502105713, "rewards_train/2-w": 1.3691601753234863, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.374659299850464, "rewards_train/margins_1": 1.3285307884216309, "rewards_train/margins_2": 0.7387943267822266, "step": 422 }, { "epoch": 1.26, "logps_train/policy_1_2": -230.14999389648438, "logps_train/policy_1_l": -195.35858154296875, "logps_train/policy_1_w": -181.23031616210938, "logps_train/policy_2_2": -177.81948852539062, "logps_train/policy_2_w": -238.71693420410156, "logps_train/ref_1_2": -254.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -223.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 2.3975014686584473, "rewards_train/1-l": -2.3092942237854004, "rewards_train/1-w": 4.2519683837890625, "rewards_train/2-2": 4.518050193786621, "rewards_train/2-w": 2.2533071041107178, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.561262607574463, "rewards_train/margins_1": 1.8544669151306152, "rewards_train/margins_2": 2.2647430896759033, "step": 422 }, { "epoch": 1.26, "logps_train/policy_1_2": -216.43472290039062, "logps_train/policy_1_l": -241.21124267578125, "logps_train/policy_1_w": -140.05136108398438, "logps_train/policy_2_2": -171.1961669921875, "logps_train/policy_2_w": -191.7134246826172, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.5815269947052002, "rewards_train/1-l": -2.3375511169433594, "rewards_train/1-w": 3.371427536010742, "rewards_train/2-2": 3.252257823944092, "rewards_train/2-w": 1.8036575317382812, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.708978652954102, "rewards_train/margins_1": 1.789900541305542, "rewards_train/margins_2": 1.4486002922058105, "step": 422 }, { "epoch": 1.26, "logps_train/policy_1_2": -121.12461853027344, "logps_train/policy_1_l": -86.80104064941406, "logps_train/policy_1_w": -74.61991119384766, "logps_train/policy_2_2": -85.59848022460938, "logps_train/policy_2_w": -110.51411437988281, "logps_train/ref_1_2": -127.5, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 0.6289435625076294, "rewards_train/1-l": -1.642311453819275, "rewards_train/1-w": 2.301680326461792, "rewards_train/2-2": 2.105581760406494, "rewards_train/2-w": 1.0845268964767456, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.943991780281067, "rewards_train/margins_1": 1.6727367639541626, "rewards_train/margins_2": 1.0210548639297485, "step": 422 }, { "epoch": 1.26, "logps_train/policy_1_2": -151.631591796875, "logps_train/policy_1_l": -157.99154663085938, "logps_train/policy_1_w": -82.98703002929688, "logps_train/policy_2_2": -123.8189697265625, "logps_train/policy_2_w": -114.89750671386719, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.5934820175170898, "rewards_train/1-l": -2.6670260429382324, "rewards_train/1-w": 2.8993444442749023, "rewards_train/2-2": 2.7110719680786133, "rewards_train/2-w": 1.694624900817871, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.566370487213135, "rewards_train/margins_1": 1.3058624267578125, "rewards_train/margins_2": 1.0164470672607422, "step": 422 }, { "epoch": 1.26, "logps_train/policy_1_2": -148.0455322265625, "logps_train/policy_1_l": -148.48745727539062, "logps_train/policy_1_w": -236.9000701904297, "logps_train/policy_2_2": -123.3973159790039, "logps_train/policy_2_w": -282.0140380859375, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -266.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -288.0, "rewards_train/1-2": 2.78607177734375, "rewards_train/1-l": -1.6700356006622314, "rewards_train/1-w": 2.8678059577941895, "rewards_train/2-2": 3.390737533569336, "rewards_train/2-w": 0.7552372813224792, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.537841558456421, "rewards_train/margins_1": 0.08173418045043945, "rewards_train/margins_2": 2.6355002522468567, "step": 422 }, { "epoch": 1.27, "logps_train/policy_1_2": -207.70953369140625, "logps_train/policy_1_l": -163.35035705566406, "logps_train/policy_1_w": -144.4237060546875, "logps_train/policy_2_2": -154.66676330566406, "logps_train/policy_2_w": -188.23379516601562, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.2477960586547852, "rewards_train/1-l": -1.4946072101593018, "rewards_train/1-w": 3.39444637298584, "rewards_train/2-2": 3.479417324066162, "rewards_train/2-w": 1.9400969743728638, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.889053583145142, "rewards_train/margins_1": 2.1466503143310547, "rewards_train/margins_2": 1.5393203496932983, "step": 423 }, { "epoch": 1.27, "logps_train/policy_1_2": -158.99557495117188, "logps_train/policy_1_l": -166.39700317382812, "logps_train/policy_1_w": -149.09300231933594, "logps_train/policy_2_2": -115.50115203857422, "logps_train/policy_2_w": -194.78350830078125, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 1.808254599571228, "rewards_train/1-l": -2.501420497894287, "rewards_train/1-w": 3.9821066856384277, "rewards_train/2-2": 3.3573062419891357, "rewards_train/2-w": 2.0435245037078857, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.483527183532715, "rewards_train/margins_1": 2.1738520860671997, "rewards_train/margins_2": 1.31378173828125, "step": 423 }, { "epoch": 1.27, "logps_train/policy_1_2": -111.8803482055664, "logps_train/policy_1_l": -108.67709350585938, "logps_train/policy_1_w": -88.07849884033203, "logps_train/policy_2_2": -80.72285461425781, "logps_train/policy_2_w": -115.51024627685547, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -92.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.3928248882293701, "rewards_train/1-l": -1.6919771432876587, "rewards_train/1-w": 2.6659786701202393, "rewards_train/2-2": 2.3668744564056396, "rewards_train/2-w": 1.8353034257888794, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.357955813407898, "rewards_train/margins_1": 1.2731537818908691, "rewards_train/margins_2": 0.5315710306167603, "step": 423 }, { "epoch": 1.27, "logps_train/policy_1_2": -194.10610961914062, "logps_train/policy_1_l": -152.88095092773438, "logps_train/policy_1_w": -98.91606140136719, "logps_train/policy_2_2": -170.49957275390625, "logps_train/policy_2_w": -121.92994689941406, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.578452706336975, "rewards_train/1-l": -1.6130942106246948, "rewards_train/1-w": 2.658393621444702, "rewards_train/2-2": 2.7016043663024902, "rewards_train/2-w": 1.8835681676864624, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.271487832069397, "rewards_train/margins_1": 1.079940915107727, "rewards_train/margins_2": 0.8180361986160278, "step": 423 }, { "epoch": 1.27, "logps_train/policy_1_2": -142.93402099609375, "logps_train/policy_1_l": -182.07215881347656, "logps_train/policy_1_w": -130.30999755859375, "logps_train/policy_2_2": -114.14055633544922, "logps_train/policy_2_w": -153.51036071777344, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 2.1550354957580566, "rewards_train/1-l": -1.397841215133667, "rewards_train/1-w": 3.1955618858337402, "rewards_train/2-2": 2.9601635932922363, "rewards_train/2-w": 2.398963451385498, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.593403100967407, "rewards_train/margins_1": 1.0405263900756836, "rewards_train/margins_2": 0.5612001419067383, "step": 423 }, { "epoch": 1.27, "logps_train/policy_1_2": -130.0147705078125, "logps_train/policy_1_l": -198.42449951171875, "logps_train/policy_1_w": -125.3704833984375, "logps_train/policy_2_2": -98.73005676269531, "logps_train/policy_2_w": -163.70706176757812, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.6633657217025757, "rewards_train/1-l": -2.3596386909484863, "rewards_train/1-w": 3.637951374053955, "rewards_train/2-2": 2.4748458862304688, "rewards_train/2-w": 2.1886680126190186, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.997590065002441, "rewards_train/margins_1": 1.9745856523513794, "rewards_train/margins_2": 0.2861778736114502, "step": 423 }, { "epoch": 1.27, "logps_train/policy_1_2": -96.02102661132812, "logps_train/policy_1_l": -130.86917114257812, "logps_train/policy_1_w": -100.99674987792969, "logps_train/policy_2_2": -74.53889465332031, "logps_train/policy_2_w": -136.0550994873047, "logps_train/ref_1_2": -110.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.3947722911834717, "rewards_train/1-l": -2.4732933044433594, "rewards_train/1-w": 2.9620437622070312, "rewards_train/2-2": 2.214860439300537, "rewards_train/2-w": 1.382771611213684, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.435337066650391, "rewards_train/margins_1": 1.5672714710235596, "rewards_train/margins_2": 0.832088828086853, "step": 423 }, { "epoch": 1.27, "logps_train/policy_1_2": -122.04151916503906, "logps_train/policy_1_l": -247.58074951171875, "logps_train/policy_1_w": -108.78596496582031, "logps_train/policy_2_2": -102.05845642089844, "logps_train/policy_2_w": -137.180908203125, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -214.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.9458481073379517, "rewards_train/1-l": -3.3268251419067383, "rewards_train/1-w": 2.9159345626831055, "rewards_train/2-2": 2.795717239379883, "rewards_train/2-w": 2.0490965843200684, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.242759704589844, "rewards_train/margins_1": 0.9700864553451538, "rewards_train/margins_2": 0.7466206550598145, "step": 423 }, { "epoch": 1.27, "learning_rate": 1.6294959114140036e-06, "loss": 0.4492, "step": 424 }, { "epoch": 1.27, "logps_train/policy_1_2": -141.85897827148438, "logps_train/policy_1_l": -142.89801025390625, "logps_train/policy_1_w": -93.17005920410156, "logps_train/policy_2_2": -120.27330780029297, "logps_train/policy_2_w": -116.4124984741211, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": 1.8492575883865356, "rewards_train/1-l": -1.6116769313812256, "rewards_train/1-w": 3.098618507385254, "rewards_train/2-2": 2.5507941246032715, "rewards_train/2-w": 2.057187557220459, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.7102954387664795, "rewards_train/margins_1": 1.2493609189987183, "rewards_train/margins_2": 0.4936065673828125, "step": 424 }, { "epoch": 1.27, "logps_train/policy_1_2": -200.56430053710938, "logps_train/policy_1_l": -217.22848510742188, "logps_train/policy_1_w": -159.53573608398438, "logps_train/policy_2_2": -161.93618774414062, "logps_train/policy_2_w": -195.6907196044922, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": 1.5263813734054565, "rewards_train/1-l": -1.875777006149292, "rewards_train/1-w": 3.8370521068573, "rewards_train/2-2": 3.0845065116882324, "rewards_train/2-w": 2.2715530395507812, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.712829113006592, "rewards_train/margins_1": 2.3106707334518433, "rewards_train/margins_2": 0.8129534721374512, "step": 424 }, { "epoch": 1.27, "logps_train/policy_1_2": -174.04698181152344, "logps_train/policy_1_l": -214.41598510742188, "logps_train/policy_1_w": -199.39068603515625, "logps_train/policy_2_2": -132.22845458984375, "logps_train/policy_2_w": -247.3097686767578, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -233.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 1.5367082357406616, "rewards_train/1-l": -2.1628379821777344, "rewards_train/1-w": 3.3453054428100586, "rewards_train/2-2": 3.280670166015625, "rewards_train/2-w": 1.106522560119629, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.508143424987793, "rewards_train/margins_1": 1.808597207069397, "rewards_train/margins_2": 2.174147605895996, "step": 424 }, { "epoch": 1.27, "logps_train/policy_1_2": -97.8413314819336, "logps_train/policy_1_l": -95.72785949707031, "logps_train/policy_1_w": -114.74502563476562, "logps_train/policy_2_2": -71.0323257446289, "logps_train/policy_2_w": -146.46661376953125, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.3922340869903564, "rewards_train/1-l": -1.4949051141738892, "rewards_train/1-w": 2.3815524578094482, "rewards_train/2-2": 2.3222556114196777, "rewards_train/2-w": 0.9046080112457275, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8764575719833374, "rewards_train/margins_1": 0.9893183708190918, "rewards_train/margins_2": 1.4176476001739502, "step": 424 }, { "epoch": 1.27, "logps_train/policy_1_2": -196.19667053222656, "logps_train/policy_1_l": -97.29026794433594, "logps_train/policy_1_w": -143.8018798828125, "logps_train/policy_2_2": -154.88442993164062, "logps_train/policy_2_w": -179.14431762695312, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -80.5, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.144395351409912, "rewards_train/1-l": -1.6895732879638672, "rewards_train/1-w": 3.3416876792907715, "rewards_train/2-2": 2.9137063026428223, "rewards_train/2-w": 1.7418179512023926, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.031260967254639, "rewards_train/margins_1": 2.1972923278808594, "rewards_train/margins_2": 1.1718883514404297, "step": 424 }, { "epoch": 1.27, "logps_train/policy_1_2": -162.35279846191406, "logps_train/policy_1_l": -171.45529174804688, "logps_train/policy_1_w": -123.08961486816406, "logps_train/policy_2_2": -126.26219177246094, "logps_train/policy_2_w": -154.53045654296875, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 0.8608127236366272, "rewards_train/1-l": -2.554903507232666, "rewards_train/1-w": 2.5136945247650146, "rewards_train/2-2": 2.4931414127349854, "rewards_train/2-w": 1.096954107284546, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.068598031997681, "rewards_train/margins_1": 1.6528818011283875, "rewards_train/margins_2": 1.3961873054504395, "step": 424 }, { "epoch": 1.27, "logps_train/policy_1_2": -170.1919403076172, "logps_train/policy_1_l": -155.148681640625, "logps_train/policy_1_w": -176.84329223632812, "logps_train/policy_2_2": -130.91555786132812, "logps_train/policy_2_w": -226.25839233398438, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 2.1901803016662598, "rewards_train/1-l": -0.937036395072937, "rewards_train/1-w": 4.318795680999756, "rewards_train/2-2": 3.4232888221740723, "rewards_train/2-w": 1.982755422592163, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.255832076072693, "rewards_train/margins_1": 2.128615379333496, "rewards_train/margins_2": 1.4405333995819092, "step": 424 }, { "epoch": 1.27, "logps_train/policy_1_2": -67.49205017089844, "logps_train/policy_1_l": -90.22930908203125, "logps_train/policy_1_w": -84.87875366210938, "logps_train/policy_2_2": -53.736915588378906, "logps_train/policy_2_w": -111.75558471679688, "logps_train/ref_1_2": -84.0, "logps_train/ref_1_l": -65.5, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -75.5, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.6447398662567139, "rewards_train/1-l": -2.479767084121704, "rewards_train/1-w": 3.3906402587890625, "rewards_train/2-2": 2.1676716804504395, "rewards_train/2-w": 2.202566623687744, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.870407342910767, "rewards_train/margins_1": 1.7459003925323486, "rewards_train/margins_2": -0.03489494323730469, "step": 424 }, { "epoch": 1.27, "logps_train/policy_1_2": -97.95266723632812, "logps_train/policy_1_l": -114.99559020996094, "logps_train/policy_1_w": -51.689170837402344, "logps_train/policy_2_2": -68.99627685546875, "logps_train/policy_2_w": -82.98191833496094, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -94.5, "logps_train/ref_2_w": -97.5, "rewards_train/1-2": 1.3937958478927612, "rewards_train/1-l": -1.9962272644042969, "rewards_train/1-w": 2.6326451301574707, "rewards_train/2-2": 2.54880952835083, "rewards_train/2-w": 1.4424325227737427, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.628872394561768, "rewards_train/margins_1": 1.2388492822647095, "rewards_train/margins_2": 1.1063770055770874, "step": 425 }, { "epoch": 1.27, "logps_train/policy_1_2": -152.17665100097656, "logps_train/policy_1_l": -131.05116271972656, "logps_train/policy_1_w": -79.09893798828125, "logps_train/policy_2_2": -125.02212524414062, "logps_train/policy_2_w": -103.84616088867188, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -104.5, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 1.1612404584884644, "rewards_train/1-l": -1.9652729034423828, "rewards_train/1-w": 2.51979398727417, "rewards_train/2-2": 2.2524752616882324, "rewards_train/2-w": 1.6028835773468018, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.485066890716553, "rewards_train/margins_1": 1.3585535287857056, "rewards_train/margins_2": 0.6495916843414307, "step": 425 }, { "epoch": 1.27, "logps_train/policy_1_2": -216.22909545898438, "logps_train/policy_1_l": -212.89007568359375, "logps_train/policy_1_w": -131.42529296875, "logps_train/policy_2_2": -164.5853271484375, "logps_train/policy_2_w": -174.2083282470703, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 2.161466121673584, "rewards_train/1-l": -2.0681099891662598, "rewards_train/1-w": 3.4199705123901367, "rewards_train/2-2": 4.285218715667725, "rewards_train/2-w": 1.672917366027832, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.4880805015563965, "rewards_train/margins_1": 1.2585043907165527, "rewards_train/margins_2": 2.6123013496398926, "step": 425 }, { "epoch": 1.27, "logps_train/policy_1_2": -118.63609313964844, "logps_train/policy_1_l": -187.8123016357422, "logps_train/policy_1_w": -105.62429809570312, "logps_train/policy_2_2": -84.57344055175781, "logps_train/policy_2_w": -147.94546508789062, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.5965466499328613, "rewards_train/1-l": -2.662163019180298, "rewards_train/1-w": 2.805928945541382, "rewards_train/2-2": 2.694218635559082, "rewards_train/2-w": 1.3855316638946533, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.46809196472168, "rewards_train/margins_1": 1.2093822956085205, "rewards_train/margins_2": 1.3086869716644287, "step": 425 }, { "epoch": 1.27, "logps_train/policy_1_2": -153.19381713867188, "logps_train/policy_1_l": -129.97943115234375, "logps_train/policy_1_w": -112.00960540771484, "logps_train/policy_2_2": -123.56484985351562, "logps_train/policy_2_w": -138.53460693359375, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.5868679285049438, "rewards_train/1-l": -1.891302227973938, "rewards_train/1-w": 2.8076329231262207, "rewards_train/2-2": 2.541952610015869, "rewards_train/2-w": 1.6074755191802979, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.698935151100159, "rewards_train/margins_1": 1.2207649946212769, "rewards_train/margins_2": 0.9344770908355713, "step": 425 }, { "epoch": 1.27, "logps_train/policy_1_2": -124.33028411865234, "logps_train/policy_1_l": -127.2744140625, "logps_train/policy_1_w": -83.8028564453125, "logps_train/policy_2_2": -106.28315734863281, "logps_train/policy_2_w": -103.10430908203125, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 2.3988077640533447, "rewards_train/1-l": -2.068848133087158, "rewards_train/1-w": 3.2876839637756348, "rewards_train/2-2": 2.9498095512390137, "rewards_train/2-w": 2.5364437103271484, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.356532096862793, "rewards_train/margins_1": 0.88887619972229, "rewards_train/margins_2": 0.41336584091186523, "step": 425 }, { "epoch": 1.27, "logps_train/policy_1_2": -132.46710205078125, "logps_train/policy_1_l": -125.71385192871094, "logps_train/policy_1_w": -99.82050323486328, "logps_train/policy_2_2": -95.63848114013672, "logps_train/policy_2_w": -133.8651580810547, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.3540712594985962, "rewards_train/1-l": -1.8961894512176514, "rewards_train/1-w": 2.5080864429473877, "rewards_train/2-2": 2.679267406463623, "rewards_train/2-w": 1.5900471210479736, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.404275894165039, "rewards_train/margins_1": 1.1540151834487915, "rewards_train/margins_2": 1.0892202854156494, "step": 425 }, { "epoch": 1.27, "logps_train/policy_1_2": -169.29672241210938, "logps_train/policy_1_l": -229.29830932617188, "logps_train/policy_1_w": -142.43362426757812, "logps_train/policy_2_2": -126.07757568359375, "logps_train/policy_2_w": -214.04238891601562, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 1.6797034740447998, "rewards_train/1-l": -3.815476179122925, "rewards_train/1-w": 4.070895195007324, "rewards_train/2-2": 2.96567964553833, "rewards_train/2-w": 1.7391209602355957, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.886371374130249, "rewards_train/margins_1": 2.3911917209625244, "rewards_train/margins_2": 1.2265586853027344, "step": 425 }, { "epoch": 1.28, "learning_rate": 1.6063863250805279e-06, "loss": 0.4421, "step": 426 }, { "epoch": 1.28, "logps_train/policy_1_2": -87.9516372680664, "logps_train/policy_1_l": -101.1158218383789, "logps_train/policy_1_w": -74.28431701660156, "logps_train/policy_2_2": -62.44569396972656, "logps_train/policy_2_w": -101.68616485595703, "logps_train/ref_1_2": -99.0, "logps_train/ref_1_l": -85.0, "logps_train/ref_1_w": -95.0, "logps_train/ref_2_2": -83.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 1.1165549755096436, "rewards_train/1-l": -1.6240818500518799, "rewards_train/1-w": 2.041490077972412, "rewards_train/2-2": 2.0575788021087646, "rewards_train/2-w": 0.7431025505065918, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.665571928024292, "rewards_train/margins_1": 0.9249351024627686, "rewards_train/margins_2": 1.3144762516021729, "step": 426 }, { "epoch": 1.28, "logps_train/policy_1_2": -179.86248779296875, "logps_train/policy_1_l": -227.63363647460938, "logps_train/policy_1_w": -214.5415496826172, "logps_train/policy_2_2": -135.7059783935547, "logps_train/policy_2_w": -276.051513671875, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -248.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -284.0, "rewards_train/1-2": 2.134162425994873, "rewards_train/1-l": -1.683797836303711, "rewards_train/1-w": 3.3021926879882812, "rewards_train/2-2": 3.474910259246826, "rewards_train/2-w": 0.7423080801963806, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.985990524291992, "rewards_train/margins_1": 1.1680302619934082, "rewards_train/margins_2": 2.7326021790504456, "step": 426 }, { "epoch": 1.28, "logps_train/policy_1_2": -167.5147247314453, "logps_train/policy_1_l": -127.1414566040039, "logps_train/policy_1_w": -142.28488159179688, "logps_train/policy_2_2": -128.27658081054688, "logps_train/policy_2_w": -198.45350646972656, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.5383716821670532, "rewards_train/1-l": -1.8254740238189697, "rewards_train/1-w": 3.0105741024017334, "rewards_train/2-2": 3.024685859680176, "rewards_train/2-w": 0.9515237808227539, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.836048126220703, "rewards_train/margins_1": 1.4722024202346802, "rewards_train/margins_2": 2.073162078857422, "step": 426 }, { "epoch": 1.28, "logps_train/policy_1_2": -212.9686279296875, "logps_train/policy_1_l": -236.62020874023438, "logps_train/policy_1_w": -208.34457397460938, "logps_train/policy_2_2": -170.52655029296875, "logps_train/policy_2_w": -266.58465576171875, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -246.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -282.0, "rewards_train/1-2": 2.1437623500823975, "rewards_train/1-l": -2.5186610221862793, "rewards_train/1-w": 3.774916887283325, "rewards_train/2-2": 3.8106276988983154, "rewards_train/2-w": 1.5860651731491089, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.2935779094696045, "rewards_train/margins_1": 1.6311545372009277, "rewards_train/margins_2": 2.2245625257492065, "step": 426 }, { "epoch": 1.28, "logps_train/policy_1_2": -230.5891876220703, "logps_train/policy_1_l": -139.17520141601562, "logps_train/policy_1_w": -111.7022705078125, "logps_train/policy_2_2": -167.89857482910156, "logps_train/policy_2_w": -172.70431518554688, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.5129563808441162, "rewards_train/1-l": -1.1866607666015625, "rewards_train/1-w": 4.098522663116455, "rewards_train/2-2": 4.532017707824707, "rewards_train/2-w": 1.895194172859192, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.285183429718018, "rewards_train/margins_1": 2.585566282272339, "rewards_train/margins_2": 2.636823534965515, "step": 426 }, { "epoch": 1.28, "logps_train/policy_1_2": -142.90965270996094, "logps_train/policy_1_l": -148.48049926757812, "logps_train/policy_1_w": -96.91522216796875, "logps_train/policy_2_2": -109.72279357910156, "logps_train/policy_2_w": -132.007080078125, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.1059092283248901, "rewards_train/1-l": -2.4093775749206543, "rewards_train/1-w": 2.592072010040283, "rewards_train/2-2": 2.490220546722412, "rewards_train/2-w": 1.3321044445037842, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.0014495849609375, "rewards_train/margins_1": 1.486162781715393, "rewards_train/margins_2": 1.158116102218628, "step": 426 }, { "epoch": 1.28, "logps_train/policy_1_2": -274.484619140625, "logps_train/policy_1_l": -288.58978271484375, "logps_train/policy_1_w": -173.89175415039062, "logps_train/policy_2_2": -209.19650268554688, "logps_train/policy_2_w": -227.11767578125, "logps_train/ref_1_2": -292.0, "logps_train/ref_1_l": -260.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -250.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 1.570285677909851, "rewards_train/1-l": -2.8152267932891846, "rewards_train/1-w": 3.16707444190979, "rewards_train/2-2": 4.092850208282471, "rewards_train/2-w": 1.350732684135437, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.982301235198975, "rewards_train/margins_1": 1.596788763999939, "rewards_train/margins_2": 2.7421175241470337, "step": 426 }, { "epoch": 1.28, "logps_train/policy_1_2": -190.99111938476562, "logps_train/policy_1_l": -123.58190155029297, "logps_train/policy_1_w": -111.16020202636719, "logps_train/policy_2_2": -156.5855712890625, "logps_train/policy_2_w": -144.39041137695312, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.8571381568908691, "rewards_train/1-l": -2.0163931846618652, "rewards_train/1-w": 3.0824167728424072, "rewards_train/2-2": 3.35394287109375, "rewards_train/2-w": 1.7078335285186768, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.0988099575042725, "rewards_train/margins_1": 1.225278615951538, "rewards_train/margins_2": 1.6461093425750732, "step": 426 }, { "epoch": 1.28, "logps_train/policy_1_2": -176.93475341796875, "logps_train/policy_1_l": -139.85458374023438, "logps_train/policy_1_w": -146.84402465820312, "logps_train/policy_2_2": -138.7994384765625, "logps_train/policy_2_w": -195.95828247070312, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -125.5, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": 0.9127751588821411, "rewards_train/1-l": -1.4128029346466064, "rewards_train/1-w": 3.528878688812256, "rewards_train/2-2": 2.9591188430786133, "rewards_train/2-w": 1.2877647876739502, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.941681623458862, "rewards_train/margins_1": 2.6161035299301147, "rewards_train/margins_2": 1.671354055404663, "step": 427 }, { "epoch": 1.28, "logps_train/policy_1_2": -119.68103790283203, "logps_train/policy_1_l": -129.7418212890625, "logps_train/policy_1_w": -102.7007827758789, "logps_train/policy_2_2": -89.44314575195312, "logps_train/policy_2_w": -139.98641967773438, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 2.100646495819092, "rewards_train/1-l": -2.4308719635009766, "rewards_train/1-w": 3.6541407108306885, "rewards_train/2-2": 2.611936092376709, "rewards_train/2-w": 2.129483222961426, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.085012674331665, "rewards_train/margins_1": 1.5534942150115967, "rewards_train/margins_2": 0.4824528694152832, "step": 427 }, { "epoch": 1.28, "logps_train/policy_1_2": -244.47406005859375, "logps_train/policy_1_l": -209.0689239501953, "logps_train/policy_1_w": -186.1422119140625, "logps_train/policy_2_2": -174.90866088867188, "logps_train/policy_2_w": -231.11708068847656, "logps_train/ref_1_2": -264.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -221.0, "logps_train/ref_2_2": -219.0, "logps_train/ref_2_w": -251.0, "rewards_train/1-2": 1.8838446140289307, "rewards_train/1-l": -2.7025954723358154, "rewards_train/1-w": 3.4303102493286133, "rewards_train/2-2": 4.421632766723633, "rewards_train/2-w": 1.9461050033569336, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.132905721664429, "rewards_train/margins_1": 1.5464656352996826, "rewards_train/margins_2": 2.475527763366699, "step": 427 }, { "epoch": 1.28, "logps_train/policy_1_2": -217.8660888671875, "logps_train/policy_1_l": -196.1336669921875, "logps_train/policy_1_w": -149.4542694091797, "logps_train/policy_2_2": -167.7883758544922, "logps_train/policy_2_w": -204.55169677734375, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 0.7485476732254028, "rewards_train/1-l": -2.1473264694213867, "rewards_train/1-w": 4.003401279449463, "rewards_train/2-2": 2.983271360397339, "rewards_train/2-w": 2.0522522926330566, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.15072774887085, "rewards_train/margins_1": 3.25485360622406, "rewards_train/margins_2": 0.9310190677642822, "step": 427 }, { "epoch": 1.28, "logps_train/policy_1_2": -147.18460083007812, "logps_train/policy_1_l": -152.68869018554688, "logps_train/policy_1_w": -107.48304748535156, "logps_train/policy_2_2": -113.10356140136719, "logps_train/policy_2_w": -140.52642822265625, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.9971643686294556, "rewards_train/1-l": -1.5559666156768799, "rewards_train/1-w": 3.4435038566589355, "rewards_train/2-2": 3.4162063598632812, "rewards_train/2-w": 2.362983226776123, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.999470472335815, "rewards_train/margins_1": 1.44633948802948, "rewards_train/margins_2": 1.0532231330871582, "step": 427 }, { "epoch": 1.28, "logps_train/policy_1_2": -262.674560546875, "logps_train/policy_1_l": -281.11846923828125, "logps_train/policy_1_w": -139.81321716308594, "logps_train/policy_2_2": -209.79896545410156, "logps_train/policy_2_w": -194.499755859375, "logps_train/ref_1_2": -280.0, "logps_train/ref_1_l": -254.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -248.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": 1.6106679439544678, "rewards_train/1-l": -2.682156562805176, "rewards_train/1-w": 3.454615592956543, "rewards_train/2-2": 3.80135440826416, "rewards_train/2-w": 1.851587176322937, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.136772155761719, "rewards_train/margins_1": 1.8439476490020752, "rewards_train/margins_2": 1.9497672319412231, "step": 427 }, { "epoch": 1.28, "logps_train/policy_1_2": -188.39230346679688, "logps_train/policy_1_l": -165.70794677734375, "logps_train/policy_1_w": -107.08291625976562, "logps_train/policy_2_2": -154.14418029785156, "logps_train/policy_2_w": -125.27307891845703, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 2.579519271850586, "rewards_train/1-l": -2.202434539794922, "rewards_train/1-w": 3.2932705879211426, "rewards_train/2-2": 3.8949573040008545, "rewards_train/2-w": 2.319566488265991, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.4957051277160645, "rewards_train/margins_1": 0.7137513160705566, "rewards_train/margins_2": 1.5753908157348633, "step": 427 }, { "epoch": 1.28, "logps_train/policy_1_2": -191.2635955810547, "logps_train/policy_1_l": -152.0274658203125, "logps_train/policy_1_w": -118.75068664550781, "logps_train/policy_2_2": -141.8045654296875, "logps_train/policy_2_w": -163.4728546142578, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": 1.2419997453689575, "rewards_train/1-l": -2.0593864917755127, "rewards_train/1-w": 2.971806526184082, "rewards_train/2-2": 3.05001163482666, "rewards_train/2-w": 1.3620898723602295, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.031193017959595, "rewards_train/margins_1": 1.7298067808151245, "rewards_train/margins_2": 1.6879217624664307, "step": 427 }, { "epoch": 1.28, "learning_rate": 1.58336395386638e-06, "loss": 0.3741, "step": 428 }, { "epoch": 1.28, "logps_train/policy_1_2": -135.61158752441406, "logps_train/policy_1_l": -153.70770263671875, "logps_train/policy_1_w": -96.98294067382812, "logps_train/policy_2_2": -104.96340942382812, "logps_train/policy_2_w": -130.27098083496094, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 2.1568100452423096, "rewards_train/1-l": -2.0751149654388428, "rewards_train/1-w": 2.548190116882324, "rewards_train/2-2": 3.2552218437194824, "rewards_train/2-w": 1.6439956426620483, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.623305082321167, "rewards_train/margins_1": 0.39138007164001465, "rewards_train/margins_2": 1.611226201057434, "step": 428 }, { "epoch": 1.28, "logps_train/policy_1_2": -172.47833251953125, "logps_train/policy_1_l": -195.5198974609375, "logps_train/policy_1_w": -165.63204956054688, "logps_train/policy_2_2": -147.8809814453125, "logps_train/policy_2_w": -204.24459838867188, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -223.0, "rewards_train/1-2": 2.220916271209717, "rewards_train/1-l": -2.0519909858703613, "rewards_train/1-w": 3.30710768699646, "rewards_train/2-2": 3.0689330101013184, "rewards_train/2-w": 1.884914755821228, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.359098672866821, "rewards_train/margins_1": 1.0861914157867432, "rewards_train/margins_2": 1.1840182542800903, "step": 428 }, { "epoch": 1.28, "logps_train/policy_1_2": -301.5925598144531, "logps_train/policy_1_l": -222.20066833496094, "logps_train/policy_1_w": -165.2354278564453, "logps_train/policy_2_2": -203.75698852539062, "logps_train/policy_2_w": -246.31674194335938, "logps_train/ref_1_2": -302.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -209.0, "logps_train/ref_2_2": -241.0, "logps_train/ref_2_w": -264.0, "rewards_train/1-2": 0.07512021064758301, "rewards_train/1-l": -1.9356917142868042, "rewards_train/1-w": 4.320207595825195, "rewards_train/2-2": 3.733675479888916, "rewards_train/2-w": 1.7495753765106201, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.2558993101119995, "rewards_train/margins_1": 4.245087385177612, "rewards_train/margins_2": 1.984100103378296, "step": 428 }, { "epoch": 1.28, "logps_train/policy_1_2": -159.8797607421875, "logps_train/policy_1_l": -200.68435668945312, "logps_train/policy_1_w": -117.71952819824219, "logps_train/policy_2_2": -125.05511474609375, "logps_train/policy_2_w": -151.81964111328125, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.6073373556137085, "rewards_train/1-l": -2.1126251220703125, "rewards_train/1-w": 3.138202428817749, "rewards_train/2-2": 2.8976125717163086, "rewards_train/2-w": 2.2852234840393066, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.2508275508880615, "rewards_train/margins_1": 1.5308650732040405, "rewards_train/margins_2": 0.612389087677002, "step": 428 }, { "epoch": 1.28, "logps_train/policy_1_2": -226.04542541503906, "logps_train/policy_1_l": -179.0225830078125, "logps_train/policy_1_w": -111.05239868164062, "logps_train/policy_2_2": -180.32684326171875, "logps_train/policy_2_w": -139.31689453125, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -217.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.5821763277053833, "rewards_train/1-l": -1.9371216297149658, "rewards_train/1-w": 2.163900852203369, "rewards_train/2-2": 3.6161434650421143, "rewards_train/2-w": 1.3269046545028687, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.101022481918335, "rewards_train/margins_1": 0.5817245244979858, "rewards_train/margins_2": 2.2892388105392456, "step": 428 }, { "epoch": 1.28, "logps_train/policy_1_2": -201.5839385986328, "logps_train/policy_1_l": -214.50491333007812, "logps_train/policy_1_w": -131.405517578125, "logps_train/policy_2_2": -156.68585205078125, "logps_train/policy_2_w": -176.71156311035156, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": 1.4166063070297241, "rewards_train/1-l": -2.4747111797332764, "rewards_train/1-w": 5.334447383880615, "rewards_train/2-2": 3.347820281982422, "rewards_train/2-w": 3.619469165802002, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.809158563613892, "rewards_train/margins_1": 3.917841076850891, "rewards_train/margins_2": -0.2716488838195801, "step": 428 }, { "epoch": 1.28, "logps_train/policy_1_2": -158.16433715820312, "logps_train/policy_1_l": -144.487548828125, "logps_train/policy_1_w": -97.40442657470703, "logps_train/policy_2_2": -124.59333038330078, "logps_train/policy_2_w": -142.78857421875, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.7429410219192505, "rewards_train/1-l": -1.1159427165985107, "rewards_train/1-w": 3.3017449378967285, "rewards_train/2-2": 2.804729700088501, "rewards_train/2-w": 1.128954291343689, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.417687654495239, "rewards_train/margins_1": 1.558803915977478, "rewards_train/margins_2": 1.675775408744812, "step": 428 }, { "epoch": 1.28, "logps_train/policy_1_2": -139.4102325439453, "logps_train/policy_1_l": -114.4961929321289, "logps_train/policy_1_w": -123.7301025390625, "logps_train/policy_2_2": -105.6897201538086, "logps_train/policy_2_w": -166.63307189941406, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -99.5, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.821476936340332, "rewards_train/1-l": -1.4984469413757324, "rewards_train/1-w": 3.2715206146240234, "rewards_train/2-2": 3.059934616088867, "rewards_train/2-w": 1.322239637374878, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.769967555999756, "rewards_train/margins_1": 1.4500436782836914, "rewards_train/margins_2": 1.7376949787139893, "step": 428 }, { "epoch": 1.28, "logps_train/policy_1_2": -76.61756896972656, "logps_train/policy_1_l": -50.92923355102539, "logps_train/policy_1_w": -39.2640380859375, "logps_train/policy_2_2": -52.50476837158203, "logps_train/policy_2_w": -61.64640426635742, "logps_train/ref_1_2": -86.0, "logps_train/ref_1_l": -40.75, "logps_train/ref_1_w": -62.25, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -78.0, "rewards_train/1-2": 0.9366801381111145, "rewards_train/1-l": -1.0251498222351074, "rewards_train/1-w": 2.2923460006713867, "rewards_train/2-2": 2.136241912841797, "rewards_train/2-w": 1.6517658233642578, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.317495822906494, "rewards_train/margins_1": 1.3556658625602722, "rewards_train/margins_2": 0.48447608947753906, "step": 429 }, { "epoch": 1.28, "logps_train/policy_1_2": -186.76622009277344, "logps_train/policy_1_l": -179.34088134765625, "logps_train/policy_1_w": -79.65794372558594, "logps_train/policy_2_2": -145.45626831054688, "logps_train/policy_2_w": -119.05987548828125, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 1.4530658721923828, "rewards_train/1-l": -2.5481510162353516, "rewards_train/1-w": 3.2520763874053955, "rewards_train/2-2": 3.020388126373291, "rewards_train/2-w": 1.9260435104370117, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.800227403640747, "rewards_train/margins_1": 1.7990105152130127, "rewards_train/margins_2": 1.0943446159362793, "step": 429 }, { "epoch": 1.28, "logps_train/policy_1_2": -179.35760498046875, "logps_train/policy_1_l": -170.28549194335938, "logps_train/policy_1_w": -139.67306518554688, "logps_train/policy_2_2": -140.82296752929688, "logps_train/policy_2_w": -190.92474365234375, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 2.229863166809082, "rewards_train/1-l": -1.9607019424438477, "rewards_train/1-w": 4.274100303649902, "rewards_train/2-2": 3.773953437805176, "rewards_train/2-w": 2.13096284866333, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.23480224609375, "rewards_train/margins_1": 2.0442371368408203, "rewards_train/margins_2": 1.6429905891418457, "step": 429 }, { "epoch": 1.28, "logps_train/policy_1_2": -114.12300872802734, "logps_train/policy_1_l": -139.98876953125, "logps_train/policy_1_w": -137.7108917236328, "logps_train/policy_2_2": -94.54732513427734, "logps_train/policy_2_w": -166.17312622070312, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -116.5, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 1.7384799718856812, "rewards_train/1-l": -2.3812992572784424, "rewards_train/1-w": 2.4914097785949707, "rewards_train/2-2": 2.484330177307129, "rewards_train/2-w": 1.7475306987762451, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.872709035873413, "rewards_train/margins_1": 0.7529298067092896, "rewards_train/margins_2": 0.7367994785308838, "step": 429 }, { "epoch": 1.28, "logps_train/policy_1_2": -88.63389587402344, "logps_train/policy_1_l": -91.50340270996094, "logps_train/policy_1_w": -65.72666931152344, "logps_train/policy_2_2": -70.9803466796875, "logps_train/policy_2_w": -90.31687927246094, "logps_train/ref_1_2": -107.0, "logps_train/ref_1_l": -79.0, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -95.5, "logps_train/ref_2_w": -106.5, "rewards_train/1-2": 1.8162975311279297, "rewards_train/1-l": -1.2397936582565308, "rewards_train/1-w": 2.659364700317383, "rewards_train/2-2": 2.4572386741638184, "rewards_train/2-w": 1.608937382698059, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8991583585739136, "rewards_train/margins_1": 0.8430671691894531, "rewards_train/margins_2": 0.8483012914657593, "step": 429 }, { "epoch": 1.28, "logps_train/policy_1_2": -212.6400146484375, "logps_train/policy_1_l": -235.82626342773438, "logps_train/policy_1_w": -135.9874267578125, "logps_train/policy_2_2": -173.20278930664062, "logps_train/policy_2_w": -174.10694885253906, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -207.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": 1.901623249053955, "rewards_train/1-l": -2.8923916816711426, "rewards_train/1-w": 3.2371954917907715, "rewards_train/2-2": 3.656282901763916, "rewards_train/2-w": 2.281491994857788, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.129587173461914, "rewards_train/margins_1": 1.3355722427368164, "rewards_train/margins_2": 1.374790906906128, "step": 429 }, { "epoch": 1.28, "logps_train/policy_1_2": -115.62173461914062, "logps_train/policy_1_l": -127.4947280883789, "logps_train/policy_1_w": -106.32913208007812, "logps_train/policy_2_2": -79.40267181396484, "logps_train/policy_2_w": -164.3265838623047, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 2.606577157974243, "rewards_train/1-l": -2.114121675491333, "rewards_train/1-w": 3.4577114582061768, "rewards_train/2-2": 3.775357723236084, "rewards_train/2-w": 1.1923410892486572, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.57183313369751, "rewards_train/margins_1": 0.8511343002319336, "rewards_train/margins_2": 2.5830166339874268, "step": 429 }, { "epoch": 1.28, "logps_train/policy_1_2": -136.35182189941406, "logps_train/policy_1_l": -169.41702270507812, "logps_train/policy_1_w": -102.6935806274414, "logps_train/policy_2_2": -102.37696075439453, "logps_train/policy_2_w": -142.8887939453125, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.3929437398910522, "rewards_train/1-l": -3.093069076538086, "rewards_train/1-w": 2.987673044204712, "rewards_train/2-2": 2.801365852355957, "rewards_train/2-w": 1.6704957485198975, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.080742120742798, "rewards_train/margins_1": 1.5947293043136597, "rewards_train/margins_2": 1.1308701038360596, "step": 429 }, { "epoch": 1.29, "learning_rate": 1.5604310447144052e-06, "loss": 0.4742, "step": 430 }, { "epoch": 1.29, "logps_train/policy_1_2": -177.90481567382812, "logps_train/policy_1_l": -143.0838165283203, "logps_train/policy_1_w": -113.10897064208984, "logps_train/policy_2_2": -122.19862365722656, "logps_train/policy_2_w": -159.5621795654297, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 0.9774876832962036, "rewards_train/1-l": -1.58181893825531, "rewards_train/1-w": 3.3828530311584473, "rewards_train/2-2": 2.920762538909912, "rewards_train/2-w": 1.9469070434570312, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.964671969413757, "rewards_train/margins_1": 2.4053653478622437, "rewards_train/margins_2": 0.9738554954528809, "step": 430 }, { "epoch": 1.29, "logps_train/policy_1_2": -164.42636108398438, "logps_train/policy_1_l": -123.06906127929688, "logps_train/policy_1_w": -112.69700622558594, "logps_train/policy_2_2": -130.00173950195312, "logps_train/policy_2_w": -155.2197265625, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.004238486289978, "rewards_train/1-l": -1.4521207809448242, "rewards_train/1-w": 2.7124288082122803, "rewards_train/2-2": 2.4201390743255615, "rewards_train/2-w": 0.8863051533699036, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.1645495891571045, "rewards_train/margins_1": 1.7081903219223022, "rewards_train/margins_2": 1.533833920955658, "step": 430 }, { "epoch": 1.29, "logps_train/policy_1_2": -70.76091003417969, "logps_train/policy_1_l": -111.17170715332031, "logps_train/policy_1_w": -94.47142028808594, "logps_train/policy_2_2": -52.08614730834961, "logps_train/policy_2_w": -121.6957778930664, "logps_train/ref_1_2": -81.5, "logps_train/ref_1_l": -90.5, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -70.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.0532060861587524, "rewards_train/1-l": -2.065608263015747, "rewards_train/1-w": 2.630983829498291, "rewards_train/2-2": 1.7991979122161865, "rewards_train/2-w": 1.361671805381775, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.696592092514038, "rewards_train/margins_1": 1.5777777433395386, "rewards_train/margins_2": 0.4375261068344116, "step": 430 }, { "epoch": 1.29, "logps_train/policy_1_2": -224.9530029296875, "logps_train/policy_1_l": -226.46661376953125, "logps_train/policy_1_w": -154.08013916015625, "logps_train/policy_2_2": -178.52182006835938, "logps_train/policy_2_w": -207.50125122070312, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -211.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.5039197206497192, "rewards_train/1-l": -2.427325487136841, "rewards_train/1-w": 3.5841732025146484, "rewards_train/2-2": 3.2403969764709473, "rewards_train/2-w": 1.4076876640319824, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.011498689651489, "rewards_train/margins_1": 2.080253481864929, "rewards_train/margins_2": 1.8327093124389648, "step": 430 }, { "epoch": 1.29, "logps_train/policy_1_2": -147.0398712158203, "logps_train/policy_1_l": -140.19351196289062, "logps_train/policy_1_w": -144.0349884033203, "logps_train/policy_2_2": -111.49798583984375, "logps_train/policy_2_w": -175.3689727783203, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.7350753545761108, "rewards_train/1-l": -1.5025534629821777, "rewards_train/1-w": 2.6558761596679688, "rewards_train/2-2": 3.080279588699341, "rewards_train/2-w": 1.3142752647399902, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.1584296226501465, "rewards_train/margins_1": 0.9208008050918579, "rewards_train/margins_2": 1.7660043239593506, "step": 430 }, { "epoch": 1.29, "logps_train/policy_1_2": -129.45361328125, "logps_train/policy_1_l": -131.2156982421875, "logps_train/policy_1_w": -130.70590209960938, "logps_train/policy_2_2": -100.93618774414062, "logps_train/policy_2_w": -178.59326171875, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.6530768871307373, "rewards_train/1-l": -1.5352427959442139, "rewards_train/1-w": 3.816911220550537, "rewards_train/2-2": 2.661068916320801, "rewards_train/2-w": 1.8750498294830322, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.352154016494751, "rewards_train/margins_1": 2.1638343334198, "rewards_train/margins_2": 0.7860190868377686, "step": 430 }, { "epoch": 1.29, "logps_train/policy_1_2": -135.82154846191406, "logps_train/policy_1_l": -254.28440856933594, "logps_train/policy_1_w": -104.8362808227539, "logps_train/policy_2_2": -93.56709289550781, "logps_train/policy_2_w": -140.21876525878906, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.652220368385315, "rewards_train/1-l": -4.267114162445068, "rewards_train/1-w": 2.7355124950408936, "rewards_train/2-2": 3.0182905197143555, "rewards_train/2-w": 1.5199207067489624, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.002626657485962, "rewards_train/margins_1": 1.0832921266555786, "rewards_train/margins_2": 1.498369812965393, "step": 430 }, { "epoch": 1.29, "logps_train/policy_1_2": -196.59393310546875, "logps_train/policy_1_l": -217.52084350585938, "logps_train/policy_1_w": -145.39976501464844, "logps_train/policy_2_2": -147.6259765625, "logps_train/policy_2_w": -206.56021118164062, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 2.2406063079833984, "rewards_train/1-l": -2.714585304260254, "rewards_train/1-w": 3.77408504486084, "rewards_train/2-2": 4.162403583526611, "rewards_train/2-w": 1.7517929077148438, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.488670349121094, "rewards_train/margins_1": 1.5334787368774414, "rewards_train/margins_2": 2.4106106758117676, "step": 430 }, { "epoch": 1.29, "logps_train/policy_1_2": -150.4290008544922, "logps_train/policy_1_l": -129.18600463867188, "logps_train/policy_1_w": -70.71490478515625, "logps_train/policy_2_2": -120.43898010253906, "logps_train/policy_2_w": -89.48973846435547, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 1.194599986076355, "rewards_train/1-l": -1.8732876777648926, "rewards_train/1-w": 2.983978271484375, "rewards_train/2-2": 2.7592267990112305, "rewards_train/2-w": 2.4604015350341797, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.857265949249268, "rewards_train/margins_1": 1.78937828540802, "rewards_train/margins_2": 0.2988252639770508, "step": 431 }, { "epoch": 1.29, "logps_train/policy_1_2": -141.6142578125, "logps_train/policy_1_l": -121.06859588623047, "logps_train/policy_1_w": -102.2469711303711, "logps_train/policy_2_2": -115.33474731445312, "logps_train/policy_2_w": -136.07403564453125, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -109.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.5596674680709839, "rewards_train/1-l": -1.1623287200927734, "rewards_train/1-w": 3.1737406253814697, "rewards_train/2-2": 2.4782443046569824, "rewards_train/2-w": 1.9613474607467651, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.336069345474243, "rewards_train/margins_1": 1.6140731573104858, "rewards_train/margins_2": 0.5168968439102173, "step": 431 }, { "epoch": 1.29, "logps_train/policy_1_2": -257.5924072265625, "logps_train/policy_1_l": -260.01995849609375, "logps_train/policy_1_w": -157.6549835205078, "logps_train/policy_2_2": -204.30908203125, "logps_train/policy_2_w": -204.29991149902344, "logps_train/ref_1_2": -276.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -246.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 1.828259825706482, "rewards_train/1-l": -3.1891050338745117, "rewards_train/1-w": 3.839188814163208, "rewards_train/2-2": 4.108155250549316, "rewards_train/2-w": 1.9418842792510986, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.02829384803772, "rewards_train/margins_1": 2.010928988456726, "rewards_train/margins_2": 2.1662709712982178, "step": 431 }, { "epoch": 1.29, "logps_train/policy_1_2": -177.6070556640625, "logps_train/policy_1_l": -249.8731231689453, "logps_train/policy_1_w": -134.19093322753906, "logps_train/policy_2_2": -137.09466552734375, "logps_train/policy_2_w": -195.36041259765625, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.986170768737793, "rewards_train/1-l": -3.2091870307922363, "rewards_train/1-w": 3.721531867980957, "rewards_train/2-2": 3.585846185684204, "rewards_train/2-w": 1.1045840978622437, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.930718898773193, "rewards_train/margins_1": 1.735361099243164, "rewards_train/margins_2": 2.4812620878219604, "step": 431 }, { "epoch": 1.29, "logps_train/policy_1_2": -92.65584564208984, "logps_train/policy_1_l": -89.11373901367188, "logps_train/policy_1_w": -41.758766174316406, "logps_train/policy_2_2": -68.76553344726562, "logps_train/policy_2_w": -61.33250427246094, "logps_train/ref_1_2": -98.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -57.5, "logps_train/ref_2_2": -85.5, "logps_train/ref_2_w": -68.0, "rewards_train/1-2": 0.5174238085746765, "rewards_train/1-l": -1.0803189277648926, "rewards_train/1-w": 1.5708028078079224, "rewards_train/2-2": 1.6733490228652954, "rewards_train/2-w": 0.6923354268074036, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.651121735572815, "rewards_train/margins_1": 1.0533789992332458, "rewards_train/margins_2": 0.9810135960578918, "step": 431 }, { "epoch": 1.29, "logps_train/policy_1_2": -165.026123046875, "logps_train/policy_1_l": -147.8663787841797, "logps_train/policy_1_w": -153.64456176757812, "logps_train/policy_2_2": -128.18052673339844, "logps_train/policy_2_w": -198.54176330566406, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.945824384689331, "rewards_train/1-l": -1.4616386890411377, "rewards_train/1-w": 3.87304425239563, "rewards_train/2-2": 3.4366350173950195, "rewards_train/2-w": 2.033324956893921, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.334682941436768, "rewards_train/margins_1": 1.9272198677062988, "rewards_train/margins_2": 1.4033100605010986, "step": 431 }, { "epoch": 1.29, "logps_train/policy_1_2": -142.12905883789062, "logps_train/policy_1_l": -159.7833709716797, "logps_train/policy_1_w": -106.86593627929688, "logps_train/policy_2_2": -119.91853332519531, "logps_train/policy_2_w": -135.25982666015625, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.770296335220337, "rewards_train/1-l": -1.9181804656982422, "rewards_train/1-w": 3.1321568489074707, "rewards_train/2-2": 2.713224411010742, "rewards_train/2-w": 2.014641761779785, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.050337314605713, "rewards_train/margins_1": 1.3618605136871338, "rewards_train/margins_2": 0.698582649230957, "step": 431 }, { "epoch": 1.29, "logps_train/policy_1_2": -172.00425720214844, "logps_train/policy_1_l": -105.85208129882812, "logps_train/policy_1_w": -95.5660400390625, "logps_train/policy_2_2": -133.32601928710938, "logps_train/policy_2_w": -134.22216796875, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 2.1245739459991455, "rewards_train/1-l": -1.149954080581665, "rewards_train/1-w": 2.919177293777466, "rewards_train/2-2": 3.3111472129821777, "rewards_train/2-w": 1.6848137378692627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.069131374359131, "rewards_train/margins_1": 0.7946033477783203, "rewards_train/margins_2": 1.626333475112915, "step": 431 }, { "epoch": 1.29, "learning_rate": 1.5375898358361079e-06, "loss": 0.3849, "step": 432 }, { "epoch": 1.29, "logps_train/policy_1_2": -103.6656494140625, "logps_train/policy_1_l": -103.7939224243164, "logps_train/policy_1_w": -48.600372314453125, "logps_train/policy_2_2": -81.05778503417969, "logps_train/policy_2_w": -70.50166320800781, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -71.5, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -85.0, "rewards_train/1-2": 1.022497534751892, "rewards_train/1-l": -1.6692357063293457, "rewards_train/1-w": 2.300900459289551, "rewards_train/2-2": 1.8658037185668945, "rewards_train/2-w": 1.407646656036377, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.9701361656188965, "rewards_train/margins_1": 1.2784029245376587, "rewards_train/margins_2": 0.4581570625305176, "step": 432 }, { "epoch": 1.29, "logps_train/policy_1_2": -196.2396240234375, "logps_train/policy_1_l": -228.09030151367188, "logps_train/policy_1_w": -117.53839874267578, "logps_train/policy_2_2": -156.14956665039062, "logps_train/policy_2_w": -150.5883026123047, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.807288408279419, "rewards_train/1-l": -3.1160616874694824, "rewards_train/1-w": 3.3555350303649902, "rewards_train/2-2": 3.5819177627563477, "rewards_train/2-w": 2.2661690711975098, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.471596717834473, "rewards_train/margins_1": 1.5482466220855713, "rewards_train/margins_2": 1.315748691558838, "step": 432 }, { "epoch": 1.29, "logps_train/policy_1_2": -166.72531127929688, "logps_train/policy_1_l": -168.80279541015625, "logps_train/policy_1_w": -133.642578125, "logps_train/policy_2_2": -120.3983154296875, "logps_train/policy_2_w": -168.95492553710938, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 1.883718729019165, "rewards_train/1-l": -1.2451231479644775, "rewards_train/1-w": 3.3630871772766113, "rewards_train/2-2": 3.4773569107055664, "rewards_train/2-w": 1.8091942071914673, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.608210325241089, "rewards_train/margins_1": 1.4793684482574463, "rewards_train/margins_2": 1.6681627035140991, "step": 432 }, { "epoch": 1.29, "logps_train/policy_1_2": -138.4620819091797, "logps_train/policy_1_l": -159.51095581054688, "logps_train/policy_1_w": -108.58766174316406, "logps_train/policy_2_2": -121.42437744140625, "logps_train/policy_2_w": -137.31192016601562, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.7602369785308838, "rewards_train/1-l": -2.028341770172119, "rewards_train/1-w": 3.3881092071533203, "rewards_train/2-2": 2.2593202590942383, "rewards_train/2-w": 2.284433126449585, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.4164509773254395, "rewards_train/margins_1": 1.6278722286224365, "rewards_train/margins_2": -0.02511286735534668, "step": 432 }, { "epoch": 1.29, "logps_train/policy_1_2": -87.57968139648438, "logps_train/policy_1_l": -54.62860870361328, "logps_train/policy_1_w": -111.6435546875, "logps_train/policy_2_2": -65.52320861816406, "logps_train/policy_2_w": -149.30154418945312, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -48.25, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -86.5, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.2264070510864258, "rewards_train/1-l": -0.6376657485961914, "rewards_train/1-w": 2.625194787979126, "rewards_train/2-2": 2.096898078918457, "rewards_train/2-w": 0.7546099424362183, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2628605365753174, "rewards_train/margins_1": 1.3987877368927002, "rewards_train/margins_2": 1.3422881364822388, "step": 432 }, { "epoch": 1.29, "logps_train/policy_1_2": -183.835693359375, "logps_train/policy_1_l": -174.40927124023438, "logps_train/policy_1_w": -149.84872436523438, "logps_train/policy_2_2": -151.08099365234375, "logps_train/policy_2_w": -184.60841369628906, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 2.2570548057556152, "rewards_train/1-l": -2.495224952697754, "rewards_train/1-w": 3.5755772590637207, "rewards_train/2-2": 3.408306837081909, "rewards_train/2-w": 2.1719717979431152, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.070802211761475, "rewards_train/margins_1": 1.3185224533081055, "rewards_train/margins_2": 1.236335039138794, "step": 432 }, { "epoch": 1.29, "logps_train/policy_1_2": -144.9283905029297, "logps_train/policy_1_l": -100.74713134765625, "logps_train/policy_1_w": -64.07598876953125, "logps_train/policy_2_2": -113.66789245605469, "logps_train/policy_2_w": -84.38414001464844, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -79.0, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -97.0, "rewards_train/1-2": 1.4915359020233154, "rewards_train/1-l": -2.18882417678833, "rewards_train/1-w": 2.1922543048858643, "rewards_train/2-2": 2.952741861343384, "rewards_train/2-w": 1.2726948261260986, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.381078481674194, "rewards_train/margins_1": 0.7007184028625488, "rewards_train/margins_2": 1.6800470352172852, "step": 432 }, { "epoch": 1.29, "logps_train/policy_1_2": -103.29220581054688, "logps_train/policy_1_l": -80.19400787353516, "logps_train/policy_1_w": -89.43952178955078, "logps_train/policy_2_2": -82.388916015625, "logps_train/policy_2_w": -111.88724517822266, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -68.5, "logps_train/ref_1_w": -112.5, "logps_train/ref_2_2": -104.5, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.6184353828430176, "rewards_train/1-l": -1.1703773736953735, "rewards_train/1-w": 2.299016237258911, "rewards_train/2-2": 2.2220458984375, "rewards_train/2-w": 1.401118516921997, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4693936109542847, "rewards_train/margins_1": 0.6805808544158936, "rewards_train/margins_2": 0.8209273815155029, "step": 432 }, { "epoch": 1.3, "logps_train/policy_1_2": -139.0869140625, "logps_train/policy_1_l": -187.74961853027344, "logps_train/policy_1_w": -110.60720825195312, "logps_train/policy_2_2": -105.77130126953125, "logps_train/policy_2_w": -156.10159301757812, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.5850584506988525, "rewards_train/1-l": -3.3647561073303223, "rewards_train/1-w": 3.3330297470092773, "rewards_train/2-2": 2.972869873046875, "rewards_train/2-w": 1.5460898876190186, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.6977858543396, "rewards_train/margins_1": 1.7479712963104248, "rewards_train/margins_2": 1.4267799854278564, "step": 433 }, { "epoch": 1.3, "logps_train/policy_1_2": -164.95021057128906, "logps_train/policy_1_l": -181.11952209472656, "logps_train/policy_1_w": -126.07138061523438, "logps_train/policy_2_2": -142.3502197265625, "logps_train/policy_2_w": -169.3944854736328, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": 2.008885145187378, "rewards_train/1-l": -1.5221086740493774, "rewards_train/1-w": 2.9373927116394043, "rewards_train/2-2": 2.738025426864624, "rewards_train/2-w": 1.1519569158554077, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.459501385688782, "rewards_train/margins_1": 0.9285075664520264, "rewards_train/margins_2": 1.5860685110092163, "step": 433 }, { "epoch": 1.3, "logps_train/policy_1_2": -173.37319946289062, "logps_train/policy_1_l": -208.79835510253906, "logps_train/policy_1_w": -122.89501953125, "logps_train/policy_2_2": -146.3580780029297, "logps_train/policy_2_w": -154.39988708496094, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 2.4439287185668945, "rewards_train/1-l": -2.6509296894073486, "rewards_train/1-w": 2.752685308456421, "rewards_train/2-2": 3.454035997390747, "rewards_train/2-w": 1.817823886871338, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.4036149978637695, "rewards_train/margins_1": 0.30875658988952637, "rewards_train/margins_2": 1.6362121105194092, "step": 433 }, { "epoch": 1.3, "logps_train/policy_1_2": -187.1363525390625, "logps_train/policy_1_l": -190.02374267578125, "logps_train/policy_1_w": -159.25892639160156, "logps_train/policy_2_2": -153.41876220703125, "logps_train/policy_2_w": -192.2643585205078, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.9387078285217285, "rewards_train/1-l": -2.63206148147583, "rewards_train/1-w": 2.9225447177886963, "rewards_train/2-2": 3.1913273334503174, "rewards_train/2-w": 1.8095011711120605, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.554606199264526, "rewards_train/margins_1": 0.9838368892669678, "rewards_train/margins_2": 1.3818261623382568, "step": 433 }, { "epoch": 1.3, "logps_train/policy_1_2": -60.0406494140625, "logps_train/policy_1_l": -70.64997863769531, "logps_train/policy_1_w": -56.709476470947266, "logps_train/policy_2_2": -43.94374465942383, "logps_train/policy_2_w": -70.78241729736328, "logps_train/ref_1_2": -69.0, "logps_train/ref_1_l": -60.0, "logps_train/ref_1_w": -72.0, "logps_train/ref_2_2": -60.25, "logps_train/ref_2_w": -78.5, "rewards_train/1-2": 0.918005108833313, "rewards_train/1-l": -1.0684161186218262, "rewards_train/1-w": 1.5188963413238525, "rewards_train/2-2": 1.6283793449401855, "rewards_train/2-w": 0.7944144010543823, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.5873124599456787, "rewards_train/margins_1": 0.6008912324905396, "rewards_train/margins_2": 0.8339649438858032, "step": 433 }, { "epoch": 1.3, "logps_train/policy_1_2": -220.2709503173828, "logps_train/policy_1_l": -208.36453247070312, "logps_train/policy_1_w": -171.87332153320312, "logps_train/policy_2_2": -186.27037048339844, "logps_train/policy_2_w": -214.9272918701172, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -203.0, "logps_train/ref_2_2": -221.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 2.0697808265686035, "rewards_train/1-l": -2.8555943965911865, "rewards_train/1-w": 3.068526268005371, "rewards_train/2-2": 3.4784317016601562, "rewards_train/2-w": 1.335396409034729, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.924120664596558, "rewards_train/margins_1": 0.9987454414367676, "rewards_train/margins_2": 2.1430352926254272, "step": 433 }, { "epoch": 1.3, "logps_train/policy_1_2": -167.43484497070312, "logps_train/policy_1_l": -192.05419921875, "logps_train/policy_1_w": -163.31134033203125, "logps_train/policy_2_2": -130.56385803222656, "logps_train/policy_2_w": -211.83297729492188, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.6221394538879395, "rewards_train/1-l": -2.042919158935547, "rewards_train/1-w": 3.56886625289917, "rewards_train/2-2": 3.2279882431030273, "rewards_train/2-w": 1.7417027950286865, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.611785411834717, "rewards_train/margins_1": 1.9467267990112305, "rewards_train/margins_2": 1.4862854480743408, "step": 433 }, { "epoch": 1.3, "logps_train/policy_1_2": -103.81299591064453, "logps_train/policy_1_l": -98.41658020019531, "logps_train/policy_1_w": -58.950016021728516, "logps_train/policy_2_2": -83.99883270263672, "logps_train/policy_2_w": -79.06065368652344, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -86.5, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -99.0, "rewards_train/1-2": 1.9401848316192627, "rewards_train/1-l": -1.1247633695602417, "rewards_train/1-w": 2.744842052459717, "rewards_train/2-2": 2.523261070251465, "rewards_train/2-w": 1.9710830450057983, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.8696054220199585, "rewards_train/margins_1": 0.8046572208404541, "rewards_train/margins_2": 0.5521780252456665, "step": 433 }, { "epoch": 1.3, "learning_rate": 1.5148425564932085e-06, "loss": 0.4022, "step": 434 }, { "epoch": 1.3, "logps_train/policy_1_2": -167.2812042236328, "logps_train/policy_1_l": -142.02633666992188, "logps_train/policy_1_w": -145.24639892578125, "logps_train/policy_2_2": -141.89154052734375, "logps_train/policy_2_w": -177.6109619140625, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 2.155473470687866, "rewards_train/1-l": -1.3820781707763672, "rewards_train/1-w": 3.3716495037078857, "rewards_train/2-2": 3.038970470428467, "rewards_train/2-w": 2.0871469974517822, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.753727674484253, "rewards_train/margins_1": 1.2161760330200195, "rewards_train/margins_2": 0.9518234729766846, "step": 434 }, { "epoch": 1.3, "logps_train/policy_1_2": -215.590576171875, "logps_train/policy_1_l": -213.74267578125, "logps_train/policy_1_w": -213.38818359375, "logps_train/policy_2_2": -179.14511108398438, "logps_train/policy_2_w": -263.5429992675781, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -256.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -288.0, "rewards_train/1-2": 2.1188230514526367, "rewards_train/1-l": -2.003955602645874, "rewards_train/1-w": 4.320361614227295, "rewards_train/2-2": 3.6885151863098145, "rewards_train/2-w": 2.5205037593841553, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.324317216873169, "rewards_train/margins_1": 2.201538562774658, "rewards_train/margins_2": 1.1680114269256592, "step": 434 }, { "epoch": 1.3, "logps_train/policy_1_2": -218.24920654296875, "logps_train/policy_1_l": -237.748046875, "logps_train/policy_1_w": -173.24502563476562, "logps_train/policy_2_2": -179.07940673828125, "logps_train/policy_2_w": -204.63720703125, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -213.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 2.8125803470611572, "rewards_train/1-l": -1.999805212020874, "rewards_train/1-w": 3.987997055053711, "rewards_train/2-2": 3.917060136795044, "rewards_train/2-w": 2.7987794876098633, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.987802267074585, "rewards_train/margins_1": 1.1754167079925537, "rewards_train/margins_2": 1.1182806491851807, "step": 434 }, { "epoch": 1.3, "logps_train/policy_1_2": -159.64358520507812, "logps_train/policy_1_l": -234.48550415039062, "logps_train/policy_1_w": -137.546142578125, "logps_train/policy_2_2": -131.33740234375, "logps_train/policy_2_w": -181.58206176757812, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 2.4075167179107666, "rewards_train/1-l": -2.982534170150757, "rewards_train/1-w": 3.9305412769317627, "rewards_train/2-2": 3.166259765625, "rewards_train/2-w": 2.1839821338653564, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.9130754470825195, "rewards_train/margins_1": 1.523024559020996, "rewards_train/margins_2": 0.9822776317596436, "step": 434 }, { "epoch": 1.3, "logps_train/policy_1_2": -206.78823852539062, "logps_train/policy_1_l": -250.2135467529297, "logps_train/policy_1_w": -204.26242065429688, "logps_train/policy_2_2": -175.24209594726562, "logps_train/policy_2_w": -259.8747863769531, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -221.0, "logps_train/ref_1_w": -252.0, "logps_train/ref_2_2": -211.0, "logps_train/ref_2_w": -284.0, "rewards_train/1-2": 2.6368017196655273, "rewards_train/1-l": -2.9385430812835693, "rewards_train/1-w": 4.697195529937744, "rewards_train/2-2": 3.563290596008301, "rewards_train/2-w": 2.531273365020752, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.6357386112213135, "rewards_train/margins_1": 2.060393810272217, "rewards_train/margins_2": 1.0320172309875488, "step": 434 }, { "epoch": 1.3, "logps_train/policy_1_2": -195.45706176757812, "logps_train/policy_1_l": -171.856689453125, "logps_train/policy_1_w": -133.7968292236328, "logps_train/policy_2_2": -143.9714813232422, "logps_train/policy_2_w": -184.03001403808594, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.6574187278747559, "rewards_train/1-l": -1.7653565406799316, "rewards_train/1-w": 3.1687543392181396, "rewards_train/2-2": 3.596602201461792, "rewards_train/2-w": 1.3938722610473633, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.934110879898071, "rewards_train/margins_1": 1.5113356113433838, "rewards_train/margins_2": 2.2027299404144287, "step": 434 }, { "epoch": 1.3, "logps_train/policy_1_2": -160.30059814453125, "logps_train/policy_1_l": -202.48147583007812, "logps_train/policy_1_w": -132.19155883789062, "logps_train/policy_2_2": -127.26367950439453, "logps_train/policy_2_w": -168.81301879882812, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 1.6754095554351807, "rewards_train/1-l": -3.0346713066101074, "rewards_train/1-w": 2.8117027282714844, "rewards_train/2-2": 3.40664005279541, "rewards_train/2-w": 1.4171361923217773, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.846374034881592, "rewards_train/margins_1": 1.1362931728363037, "rewards_train/margins_2": 1.9895038604736328, "step": 434 }, { "epoch": 1.3, "logps_train/policy_1_2": -218.2174072265625, "logps_train/policy_1_l": -179.10498046875, "logps_train/policy_1_w": -82.98023986816406, "logps_train/policy_2_2": -162.35629272460938, "logps_train/policy_2_w": -120.68050384521484, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 1.3071653842926025, "rewards_train/1-l": -2.129249095916748, "rewards_train/1-w": 2.875804901123047, "rewards_train/2-2": 3.7436683177948, "rewards_train/2-w": 1.729215145111084, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.005053997039795, "rewards_train/margins_1": 1.5686395168304443, "rewards_train/margins_2": 2.014453172683716, "step": 434 }, { "epoch": 1.3, "logps_train/policy_1_2": -79.57534790039062, "logps_train/policy_1_l": -100.58229064941406, "logps_train/policy_1_w": -61.50923538208008, "logps_train/policy_2_2": -57.62773895263672, "logps_train/policy_2_w": -85.48776245117188, "logps_train/ref_1_2": -89.0, "logps_train/ref_1_l": -77.5, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -75.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": 0.9401211142539978, "rewards_train/1-l": -2.312525987625122, "rewards_train/1-w": 1.8299360275268555, "rewards_train/2-2": 1.7610543966293335, "rewards_train/2-w": 0.7707542181015015, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.1424620151519775, "rewards_train/margins_1": 0.8898149132728577, "rewards_train/margins_2": 0.990300178527832, "step": 435 }, { "epoch": 1.3, "logps_train/policy_1_2": -135.059814453125, "logps_train/policy_1_l": -176.99240112304688, "logps_train/policy_1_w": -107.02222442626953, "logps_train/policy_2_2": -94.97307586669922, "logps_train/policy_2_w": -143.89288330078125, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.3932373523712158, "rewards_train/1-l": -1.9957232475280762, "rewards_train/1-w": 2.5680902004241943, "rewards_train/2-2": 2.713630199432373, "rewards_train/2-w": 1.4325858354568481, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.5638134479522705, "rewards_train/margins_1": 1.1748528480529785, "rewards_train/margins_2": 1.281044363975525, "step": 435 }, { "epoch": 1.3, "logps_train/policy_1_2": -189.49749755859375, "logps_train/policy_1_l": -157.3096923828125, "logps_train/policy_1_w": -119.46592712402344, "logps_train/policy_2_2": -151.53662109375, "logps_train/policy_2_w": -150.51828002929688, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": 2.0752511024475098, "rewards_train/1-l": -1.4395618438720703, "rewards_train/1-w": 3.562781810760498, "rewards_train/2-2": 3.602588176727295, "rewards_train/2-w": 2.641922950744629, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.002343654632568, "rewards_train/margins_1": 1.4875307083129883, "rewards_train/margins_2": 0.960665225982666, "step": 435 }, { "epoch": 1.3, "logps_train/policy_1_2": -150.42495727539062, "logps_train/policy_1_l": -171.7232666015625, "logps_train/policy_1_w": -127.66053009033203, "logps_train/policy_2_2": -122.1509780883789, "logps_train/policy_2_w": -161.04244995117188, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 2.4200053215026855, "rewards_train/1-l": -1.2826793193817139, "rewards_train/1-w": 3.0995726585388184, "rewards_train/2-2": 3.2599024772644043, "rewards_train/2-w": 2.1699752807617188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.382251977920532, "rewards_train/margins_1": 0.6795673370361328, "rewards_train/margins_2": 1.0899271965026855, "step": 435 }, { "epoch": 1.3, "logps_train/policy_1_2": -220.19308471679688, "logps_train/policy_1_l": -234.893310546875, "logps_train/policy_1_w": -175.415771484375, "logps_train/policy_2_2": -182.08993530273438, "logps_train/policy_2_w": -207.6033935546875, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -227.0, "logps_train/ref_2_w": -243.0, "rewards_train/1-2": 2.7619423866271973, "rewards_train/1-l": -2.361205577850342, "rewards_train/1-w": 4.845922946929932, "rewards_train/2-2": 4.522256374359131, "rewards_train/2-w": 3.4896607398986816, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.207128524780273, "rewards_train/margins_1": 2.0839805603027344, "rewards_train/margins_2": 1.0325956344604492, "step": 435 }, { "epoch": 1.3, "logps_train/policy_1_2": -209.50418090820312, "logps_train/policy_1_l": -206.38885498046875, "logps_train/policy_1_w": -179.7564697265625, "logps_train/policy_2_2": -167.3487548828125, "logps_train/policy_2_w": -227.95384216308594, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 1.8277065753936768, "rewards_train/1-l": -2.638885974884033, "rewards_train/1-w": 4.427478790283203, "rewards_train/2-2": 3.5346570014953613, "rewards_train/2-w": 2.7968039512634277, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.066364765167236, "rewards_train/margins_1": 2.5997722148895264, "rewards_train/margins_2": 0.7378530502319336, "step": 435 }, { "epoch": 1.3, "logps_train/policy_1_2": -90.56668090820312, "logps_train/policy_1_l": -60.51705551147461, "logps_train/policy_1_w": -88.90184783935547, "logps_train/policy_2_2": -61.176788330078125, "logps_train/policy_2_w": -142.736328125, "logps_train/ref_1_2": -102.5, "logps_train/ref_1_l": -51.5, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.209933876991272, "rewards_train/1-l": -0.9299282431602478, "rewards_train/1-w": 2.579151153564453, "rewards_train/2-2": 2.50439190864563, "rewards_train/2-w": 0.2925788462162018, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.509079396724701, "rewards_train/margins_1": 1.3692172765731812, "rewards_train/margins_2": 2.211813062429428, "step": 435 }, { "epoch": 1.3, "logps_train/policy_1_2": -213.11009216308594, "logps_train/policy_1_l": -176.48724365234375, "logps_train/policy_1_w": -138.60223388671875, "logps_train/policy_2_2": -175.7188262939453, "logps_train/policy_2_w": -165.6382598876953, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.8061786890029907, "rewards_train/1-l": -1.6081008911132812, "rewards_train/1-w": 3.710089921951294, "rewards_train/2-2": 2.978116989135742, "rewards_train/2-w": 2.6580488681793213, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.318190813064575, "rewards_train/margins_1": 1.9039112329483032, "rewards_train/margins_2": 0.3200681209564209, "step": 435 }, { "epoch": 1.31, "learning_rate": 1.49219142678007e-06, "loss": 0.3521, "step": 436 }, { "epoch": 1.31, "logps_train/policy_1_2": -122.6756591796875, "logps_train/policy_1_l": -135.9609832763672, "logps_train/policy_1_w": -88.18475341796875, "logps_train/policy_2_2": -94.85540771484375, "logps_train/policy_2_w": -121.29513549804688, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -114.5, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.5660274028778076, "rewards_train/1-l": -2.157036304473877, "rewards_train/1-w": 2.840118408203125, "rewards_train/2-2": 2.411529302597046, "rewards_train/2-w": 1.5072048902511597, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.997154712677002, "rewards_train/margins_1": 1.2740910053253174, "rewards_train/margins_2": 0.9043244123458862, "step": 436 }, { "epoch": 1.31, "logps_train/policy_1_2": -236.7926025390625, "logps_train/policy_1_l": -188.3126220703125, "logps_train/policy_1_w": -118.57760620117188, "logps_train/policy_2_2": -177.48800659179688, "logps_train/policy_2_w": -170.58154296875, "logps_train/ref_1_2": -252.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.5426157712936401, "rewards_train/1-l": -1.605870246887207, "rewards_train/1-w": 4.067239761352539, "rewards_train/2-2": 4.0121355056762695, "rewards_train/2-w": 1.9480953216552734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.673110008239746, "rewards_train/margins_1": 2.524623990058899, "rewards_train/margins_2": 2.064040184020996, "step": 436 }, { "epoch": 1.31, "logps_train/policy_1_2": -180.4012451171875, "logps_train/policy_1_l": -179.8634033203125, "logps_train/policy_1_w": -135.83572387695312, "logps_train/policy_2_2": -134.49557495117188, "logps_train/policy_2_w": -188.63023376464844, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": 2.1505002975463867, "rewards_train/1-l": -2.108214855194092, "rewards_train/1-w": 3.4976768493652344, "rewards_train/2-2": 3.286379337310791, "rewards_train/2-w": 2.0682270526885986, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.605891704559326, "rewards_train/margins_1": 1.3471765518188477, "rewards_train/margins_2": 1.2181522846221924, "step": 436 }, { "epoch": 1.31, "logps_train/policy_1_2": -117.78898620605469, "logps_train/policy_1_l": -141.9599151611328, "logps_train/policy_1_w": -80.54048919677734, "logps_train/policy_2_2": -94.59898376464844, "logps_train/policy_2_w": -100.39518737792969, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.5148518085479736, "rewards_train/1-l": -2.5819292068481445, "rewards_train/1-w": 3.261575937271118, "rewards_train/2-2": 2.3104138374328613, "rewards_train/2-w": 2.571418285369873, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.843505144119263, "rewards_train/margins_1": 1.7467241287231445, "rewards_train/margins_2": -0.2610044479370117, "step": 436 }, { "epoch": 1.31, "logps_train/policy_1_2": -63.233062744140625, "logps_train/policy_1_l": -71.07161712646484, "logps_train/policy_1_w": -45.760704040527344, "logps_train/policy_2_2": -45.62731170654297, "logps_train/policy_2_w": -67.77153015136719, "logps_train/ref_1_2": -76.5, "logps_train/ref_1_l": -60.5, "logps_train/ref_1_w": -64.5, "logps_train/ref_2_2": -62.75, "logps_train/ref_2_w": -80.5, "rewards_train/1-2": 1.3497402667999268, "rewards_train/1-l": -1.0545251369476318, "rewards_train/1-w": 1.9020544290542603, "rewards_train/2-2": 1.7251595258712769, "rewards_train/2-w": 1.2830027341842651, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.956579566001892, "rewards_train/margins_1": 0.5523141622543335, "rewards_train/margins_2": 0.4421567916870117, "step": 436 }, { "epoch": 1.31, "logps_train/policy_1_2": -186.61270141601562, "logps_train/policy_1_l": -147.06918334960938, "logps_train/policy_1_w": -143.81036376953125, "logps_train/policy_2_2": -151.62872314453125, "logps_train/policy_2_w": -188.72451782226562, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": 2.2504477500915527, "rewards_train/1-l": -2.5858254432678223, "rewards_train/1-w": 4.08302640914917, "rewards_train/2-2": 3.6281423568725586, "rewards_train/2-w": 2.2142672538757324, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.668851852416992, "rewards_train/margins_1": 1.8325786590576172, "rewards_train/margins_2": 1.4138751029968262, "step": 436 }, { "epoch": 1.31, "logps_train/policy_1_2": -230.92578125, "logps_train/policy_1_l": -231.30404663085938, "logps_train/policy_1_w": -164.08726501464844, "logps_train/policy_2_2": -170.08274841308594, "logps_train/policy_2_w": -207.22926330566406, "logps_train/ref_1_2": -251.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -207.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 2.037109613418579, "rewards_train/1-l": -2.350423574447632, "rewards_train/1-w": 3.000648260116577, "rewards_train/2-2": 3.6862571239471436, "rewards_train/2-w": 1.4712144136428833, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.351071834564209, "rewards_train/margins_1": 0.963538646697998, "rewards_train/margins_2": 2.2150427103042603, "step": 436 }, { "epoch": 1.31, "logps_train/policy_1_2": -150.6949005126953, "logps_train/policy_1_l": -135.9197540283203, "logps_train/policy_1_w": -113.68290710449219, "logps_train/policy_2_2": -115.58114624023438, "logps_train/policy_2_w": -152.10659790039062, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.420353889465332, "rewards_train/1-l": -1.6048669815063477, "rewards_train/1-w": 3.3879592418670654, "rewards_train/2-2": 2.851259708404541, "rewards_train/2-w": 1.5737154483795166, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.992826223373413, "rewards_train/margins_1": 1.9676053524017334, "rewards_train/margins_2": 1.2775442600250244, "step": 436 }, { "epoch": 1.31, "logps_train/policy_1_2": -138.76976013183594, "logps_train/policy_1_l": -142.9031524658203, "logps_train/policy_1_w": -96.65409851074219, "logps_train/policy_2_2": -123.14871978759766, "logps_train/policy_2_w": -122.59678649902344, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 1.754568099975586, "rewards_train/1-l": -1.496956706047058, "rewards_train/1-w": 2.3716020584106445, "rewards_train/2-2": 2.366182327270508, "rewards_train/2-w": 1.2723522186279297, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8685587644577026, "rewards_train/margins_1": 0.6170339584350586, "rewards_train/margins_2": 1.0938301086425781, "step": 437 }, { "epoch": 1.31, "logps_train/policy_1_2": -100.10885620117188, "logps_train/policy_1_l": -124.93670654296875, "logps_train/policy_1_w": -57.212913513183594, "logps_train/policy_2_2": -66.42582702636719, "logps_train/policy_2_w": -86.47303009033203, "logps_train/ref_1_2": -111.5, "logps_train/ref_1_l": -100.5, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -97.0, "rewards_train/1-2": 1.132082462310791, "rewards_train/1-l": -2.4339053630828857, "rewards_train/1-w": 2.0861306190490723, "rewards_train/2-2": 2.441401958465576, "rewards_train/2-w": 1.0745718479156494, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.520035982131958, "rewards_train/margins_1": 0.9540481567382812, "rewards_train/margins_2": 1.3668301105499268, "step": 437 }, { "epoch": 1.31, "logps_train/policy_1_2": -153.773681640625, "logps_train/policy_1_l": -184.8050994873047, "logps_train/policy_1_w": -176.05282592773438, "logps_train/policy_2_2": -130.18772888183594, "logps_train/policy_2_w": -228.0088348388672, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -247.0, "rewards_train/1-2": 2.428882598876953, "rewards_train/1-l": -2.739884853363037, "rewards_train/1-w": 4.247842311859131, "rewards_train/2-2": 3.3468523025512695, "rewards_train/2-w": 1.8663049936294556, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.987727165222168, "rewards_train/margins_1": 1.8189597129821777, "rewards_train/margins_2": 1.480547308921814, "step": 437 }, { "epoch": 1.31, "logps_train/policy_1_2": -84.01863098144531, "logps_train/policy_1_l": -162.41497802734375, "logps_train/policy_1_w": -86.59000396728516, "logps_train/policy_2_2": -63.65711212158203, "logps_train/policy_2_w": -112.6820297241211, "logps_train/ref_1_2": -98.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -84.5, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.4223556518554688, "rewards_train/1-l": -2.809661626815796, "rewards_train/1-w": 2.691195011138916, "rewards_train/2-2": 2.09053897857666, "rewards_train/2-w": 1.8607035875320435, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.500856637954712, "rewards_train/margins_1": 1.2688393592834473, "rewards_train/margins_2": 0.2298353910446167, "step": 437 }, { "epoch": 1.31, "logps_train/policy_1_2": -152.6947021484375, "logps_train/policy_1_l": -118.83735656738281, "logps_train/policy_1_w": -125.75367736816406, "logps_train/policy_2_2": -122.26858520507812, "logps_train/policy_2_w": -150.0580291748047, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.8031859397888184, "rewards_train/1-l": -1.0450631380081177, "rewards_train/1-w": 3.0980701446533203, "rewards_train/2-2": 3.253610134124756, "rewards_train/2-w": 1.7598216533660889, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.143133282661438, "rewards_train/margins_1": 1.294884204864502, "rewards_train/margins_2": 1.493788480758667, "step": 437 }, { "epoch": 1.31, "logps_train/policy_1_2": -182.74652099609375, "logps_train/policy_1_l": -107.40586853027344, "logps_train/policy_1_w": -104.89859771728516, "logps_train/policy_2_2": -148.2615966796875, "logps_train/policy_2_w": -135.95584106445312, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -92.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.7784724235534668, "rewards_train/1-l": -1.5343360900878906, "rewards_train/1-w": 3.7261555194854736, "rewards_train/2-2": 3.355090618133545, "rewards_train/2-w": 2.533323287963867, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.260491609573364, "rewards_train/margins_1": 1.9476830959320068, "rewards_train/margins_2": 0.8217673301696777, "step": 437 }, { "epoch": 1.31, "logps_train/policy_1_2": -163.6025848388672, "logps_train/policy_1_l": -173.704345703125, "logps_train/policy_1_w": -139.24807739257812, "logps_train/policy_2_2": -131.35128784179688, "logps_train/policy_2_w": -169.42320251464844, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.7334916591644287, "rewards_train/1-l": -2.6989498138427734, "rewards_train/1-w": 2.7337851524353027, "rewards_train/2-2": 3.0609636306762695, "rewards_train/2-w": 1.7256489992141724, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.432734966278076, "rewards_train/margins_1": 1.000293493270874, "rewards_train/margins_2": 1.3353146314620972, "step": 437 }, { "epoch": 1.31, "logps_train/policy_1_2": -88.64398193359375, "logps_train/policy_1_l": -91.1089859008789, "logps_train/policy_1_w": -89.47039794921875, "logps_train/policy_2_2": -64.34619140625, "logps_train/policy_2_w": -107.7757797241211, "logps_train/ref_1_2": -98.0, "logps_train/ref_1_l": -74.5, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -82.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 0.9395080208778381, "rewards_train/1-l": -1.6601171493530273, "rewards_train/1-w": 1.7255184650421143, "rewards_train/2-2": 1.7624518871307373, "rewards_train/2-w": 1.0177348852157593, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3856356143951416, "rewards_train/margins_1": 0.7860104441642761, "rewards_train/margins_2": 0.744717001914978, "step": 437 }, { "epoch": 1.31, "learning_rate": 1.4696386574070203e-06, "loss": 0.423, "step": 438 }, { "epoch": 1.31, "logps_train/policy_1_2": -107.94981384277344, "logps_train/policy_1_l": -142.25244140625, "logps_train/policy_1_w": -102.34422302246094, "logps_train/policy_2_2": -89.97156524658203, "logps_train/policy_2_w": -118.5551986694336, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 1.283339262008667, "rewards_train/1-l": -2.7088379859924316, "rewards_train/1-w": 3.147608518600464, "rewards_train/2-2": 2.1908318996429443, "rewards_train/2-w": 2.4620580673217773, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.8564465045928955, "rewards_train/margins_1": 1.8642692565917969, "rewards_train/margins_2": -0.271226167678833, "step": 438 }, { "epoch": 1.31, "logps_train/policy_1_2": -186.03448486328125, "logps_train/policy_1_l": -204.56527709960938, "logps_train/policy_1_w": -131.55503845214844, "logps_train/policy_2_2": -149.41934204101562, "logps_train/policy_2_w": -175.27122497558594, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 1.011394739151001, "rewards_train/1-l": -2.650278091430664, "rewards_train/1-w": 3.298403024673462, "rewards_train/2-2": 2.713730573654175, "rewards_train/2-w": 1.7760028839111328, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.948681116104126, "rewards_train/margins_1": 2.287008285522461, "rewards_train/margins_2": 0.937727689743042, "step": 438 }, { "epoch": 1.31, "logps_train/policy_1_2": -133.955078125, "logps_train/policy_1_l": -146.22377014160156, "logps_train/policy_1_w": -101.68584442138672, "logps_train/policy_2_2": -112.3395004272461, "logps_train/policy_2_w": -134.18301391601562, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.393554925918579, "rewards_train/1-l": -0.998158872127533, "rewards_train/1-w": 3.0685253143310547, "rewards_train/2-2": 2.586069345474243, "rewards_train/2-w": 1.8332618474960327, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.066684186458588, "rewards_train/margins_1": 1.6749703884124756, "rewards_train/margins_2": 0.7528074979782104, "step": 438 }, { "epoch": 1.31, "logps_train/policy_1_2": -156.44345092773438, "logps_train/policy_1_l": -261.5962219238281, "logps_train/policy_1_w": -153.47193908691406, "logps_train/policy_2_2": -127.51431274414062, "logps_train/policy_2_w": -196.50921630859375, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -234.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": 1.3833889961242676, "rewards_train/1-l": -2.847121000289917, "rewards_train/1-w": 2.5881574153900146, "rewards_train/2-2": 2.2723965644836426, "rewards_train/2-w": 1.0363821983337402, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.435278415679932, "rewards_train/margins_1": 1.204768419265747, "rewards_train/margins_2": 1.2360143661499023, "step": 438 }, { "epoch": 1.31, "logps_train/policy_1_2": -153.76583862304688, "logps_train/policy_1_l": -163.3047332763672, "logps_train/policy_1_w": -122.18904876708984, "logps_train/policy_2_2": -120.49346160888672, "logps_train/policy_2_w": -184.98348999023438, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.4546672105789185, "rewards_train/1-l": -1.438228964805603, "rewards_train/1-w": 3.2279701232910156, "rewards_train/2-2": 2.7365915775299072, "rewards_train/2-w": 0.9516501426696777, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.666199088096619, "rewards_train/margins_1": 1.7733029127120972, "rewards_train/margins_2": 1.7849414348602295, "step": 438 }, { "epoch": 1.31, "logps_train/policy_1_2": -106.87835693359375, "logps_train/policy_1_l": -96.12583923339844, "logps_train/policy_1_w": -84.87505340576172, "logps_train/policy_2_2": -88.3221435546875, "logps_train/policy_2_w": -107.12655639648438, "logps_train/ref_1_2": -117.5, "logps_train/ref_1_l": -79.5, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 1.0729063749313354, "rewards_train/1-l": -1.6496939659118652, "rewards_train/1-w": 2.029487133026123, "rewards_train/2-2": 1.7863409519195557, "rewards_train/2-w": 0.916836142539978, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6791810989379883, "rewards_train/margins_1": 0.9565807580947876, "rewards_train/margins_2": 0.8695048093795776, "step": 438 }, { "epoch": 1.31, "logps_train/policy_1_2": -69.46705627441406, "logps_train/policy_1_l": -73.297119140625, "logps_train/policy_1_w": -75.12753295898438, "logps_train/policy_2_2": -53.07497024536133, "logps_train/policy_2_w": -100.87237548828125, "logps_train/ref_1_2": -85.5, "logps_train/ref_1_l": -56.0, "logps_train/ref_1_w": -104.5, "logps_train/ref_2_2": -73.0, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 1.5845439434051514, "rewards_train/1-l": -1.731274127960205, "rewards_train/1-w": 2.934122085571289, "rewards_train/2-2": 2.0237531661987305, "rewards_train/2-w": 1.7752628326416016, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.665396213531494, "rewards_train/margins_1": 1.3495781421661377, "rewards_train/margins_2": 0.2484903335571289, "step": 438 }, { "epoch": 1.31, "logps_train/policy_1_2": -120.04768371582031, "logps_train/policy_1_l": -161.6665496826172, "logps_train/policy_1_w": -90.8874282836914, "logps_train/policy_2_2": -80.5845718383789, "logps_train/policy_2_w": -131.57513427734375, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 1.556169033050537, "rewards_train/1-l": -2.1393110752105713, "rewards_train/1-w": 2.6909449100494385, "rewards_train/2-2": 2.7173242568969727, "rewards_train/2-w": 1.179986596107483, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.83025598526001, "rewards_train/margins_1": 1.1347758769989014, "rewards_train/margins_2": 1.5373376607894897, "step": 438 }, { "epoch": 1.31, "logps_train/policy_1_2": -98.19041442871094, "logps_train/policy_1_l": -102.69806671142578, "logps_train/policy_1_w": -100.758544921875, "logps_train/policy_2_2": -81.83181762695312, "logps_train/policy_2_w": -123.19915008544922, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 1.8036143779754639, "rewards_train/1-l": -1.4208325147628784, "rewards_train/1-w": 2.6249263286590576, "rewards_train/2-2": 2.1261935234069824, "rewards_train/2-w": 1.5384836196899414, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.045758843421936, "rewards_train/margins_1": 0.8213119506835938, "rewards_train/margins_2": 0.587709903717041, "step": 439 }, { "epoch": 1.31, "logps_train/policy_1_2": -133.1605224609375, "logps_train/policy_1_l": -197.94638061523438, "logps_train/policy_1_w": -100.58786010742188, "logps_train/policy_2_2": -108.3875961303711, "logps_train/policy_2_w": -141.94088745117188, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.8706673383712769, "rewards_train/1-l": -2.4181745052337646, "rewards_train/1-w": 2.4755895137786865, "rewards_train/2-2": 2.552255630493164, "rewards_train/2-w": 1.2973175048828125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.893764019012451, "rewards_train/margins_1": 0.6049221754074097, "rewards_train/margins_2": 1.2549381256103516, "step": 439 }, { "epoch": 1.31, "logps_train/policy_1_2": -89.44937133789062, "logps_train/policy_1_l": -90.612548828125, "logps_train/policy_1_w": -133.4630126953125, "logps_train/policy_2_2": -64.2584228515625, "logps_train/policy_2_w": -169.95462036132812, "logps_train/ref_1_2": -102.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.2292816638946533, "rewards_train/1-l": -1.2354731559753418, "rewards_train/1-w": 3.1138556003570557, "rewards_train/2-2": 2.2460334300994873, "rewards_train/2-w": 1.201412320137024, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.3493287563323975, "rewards_train/margins_1": 1.8845739364624023, "rewards_train/margins_2": 1.0446211099624634, "step": 439 }, { "epoch": 1.31, "logps_train/policy_1_2": -180.61849975585938, "logps_train/policy_1_l": -188.8602294921875, "logps_train/policy_1_w": -188.96798706054688, "logps_train/policy_2_2": -141.56256103515625, "logps_train/policy_2_w": -247.696533203125, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 1.6818995475769043, "rewards_train/1-l": -2.5903208255767822, "rewards_train/1-w": 3.4766390323638916, "rewards_train/2-2": 3.32499361038208, "rewards_train/2-w": 0.7553471326828003, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.066959857940674, "rewards_train/margins_1": 1.7947394847869873, "rewards_train/margins_2": 2.56964647769928, "step": 439 }, { "epoch": 1.31, "logps_train/policy_1_2": -168.72903442382812, "logps_train/policy_1_l": -119.84193420410156, "logps_train/policy_1_w": -89.68082427978516, "logps_train/policy_2_2": -130.55789184570312, "logps_train/policy_2_w": -119.71174621582031, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.5083472728729248, "rewards_train/1-l": -1.3060685396194458, "rewards_train/1-w": 2.5086753368377686, "rewards_train/2-2": 2.919210433959961, "rewards_train/2-w": 1.291520357131958, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.8147438764572144, "rewards_train/margins_1": 1.0003280639648438, "rewards_train/margins_2": 1.627690076828003, "step": 439 }, { "epoch": 1.31, "logps_train/policy_1_2": -179.53463745117188, "logps_train/policy_1_l": -195.9672393798828, "logps_train/policy_1_w": -125.82730102539062, "logps_train/policy_2_2": -157.62399291992188, "logps_train/policy_2_w": -143.02572631835938, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 2.5887231826782227, "rewards_train/1-l": -2.7498481273651123, "rewards_train/1-w": 2.868051052093506, "rewards_train/2-2": 3.614163875579834, "rewards_train/2-w": 2.113054037094116, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.617899179458618, "rewards_train/margins_1": 0.2793278694152832, "rewards_train/margins_2": 1.5011098384857178, "step": 439 }, { "epoch": 1.31, "logps_train/policy_1_2": -128.82598876953125, "logps_train/policy_1_l": -194.27664184570312, "logps_train/policy_1_w": -131.452880859375, "logps_train/policy_2_2": -91.183349609375, "logps_train/policy_2_w": -175.93836975097656, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.1494317054748535, "rewards_train/1-l": -3.063699722290039, "rewards_train/1-w": 3.5513930320739746, "rewards_train/2-2": 2.4297120571136475, "rewards_train/2-w": 1.814366102218628, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.615092754364014, "rewards_train/margins_1": 2.401961326599121, "rewards_train/margins_2": 0.6153459548950195, "step": 439 }, { "epoch": 1.31, "logps_train/policy_1_2": -241.74822998046875, "logps_train/policy_1_l": -216.8211669921875, "logps_train/policy_1_w": -165.02650451660156, "logps_train/policy_2_2": -196.57003784179688, "logps_train/policy_2_w": -209.38302612304688, "logps_train/ref_1_2": -256.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": 1.4392396211624146, "rewards_train/1-l": -3.236804962158203, "rewards_train/1-w": 3.6786000728607178, "rewards_train/2-2": 3.5637004375457764, "rewards_train/2-w": 1.9070103168487549, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.915405035018921, "rewards_train/margins_1": 2.2393604516983032, "rewards_train/margins_2": 1.6566901206970215, "step": 439 }, { "epoch": 1.32, "learning_rate": 1.447186449484593e-06, "loss": 0.5152, "step": 440 }, { "epoch": 1.32, "logps_train/policy_1_2": -121.78646850585938, "logps_train/policy_1_l": -145.00665283203125, "logps_train/policy_1_w": -155.05885314941406, "logps_train/policy_2_2": -99.60601806640625, "logps_train/policy_2_w": -188.36141967773438, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.7807281017303467, "rewards_train/1-l": -2.1260557174682617, "rewards_train/1-w": 2.950559377670288, "rewards_train/2-2": 2.46439790725708, "rewards_train/2-w": 1.6787011623382568, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.07661509513855, "rewards_train/margins_1": 1.1698312759399414, "rewards_train/margins_2": 0.7856967449188232, "step": 440 }, { "epoch": 1.32, "logps_train/policy_1_2": -178.68045043945312, "logps_train/policy_1_l": -225.7852783203125, "logps_train/policy_1_w": -137.57830810546875, "logps_train/policy_2_2": -142.66188049316406, "logps_train/policy_2_w": -172.09616088867188, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 1.5710171461105347, "rewards_train/1-l": -2.7453246116638184, "rewards_train/1-w": 2.8206849098205566, "rewards_train/2-2": 2.608030319213867, "rewards_train/2-w": 1.4859896898269653, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.566009521484375, "rewards_train/margins_1": 1.249667763710022, "rewards_train/margins_2": 1.1220406293869019, "step": 440 }, { "epoch": 1.32, "logps_train/policy_1_2": -122.25494384765625, "logps_train/policy_1_l": -151.18418884277344, "logps_train/policy_1_w": -110.66322326660156, "logps_train/policy_2_2": -89.54258728027344, "logps_train/policy_2_w": -140.92428588867188, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 2.0159122943878174, "rewards_train/1-l": -2.4951767921447754, "rewards_train/1-w": 3.2071151733398438, "rewards_train/2-2": 2.9738662242889404, "rewards_train/2-w": 1.87632155418396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.702291965484619, "rewards_train/margins_1": 1.1912028789520264, "rewards_train/margins_2": 1.0975446701049805, "step": 440 }, { "epoch": 1.32, "logps_train/policy_1_2": -162.09893798828125, "logps_train/policy_1_l": -166.53228759765625, "logps_train/policy_1_w": -90.59791564941406, "logps_train/policy_2_2": -125.72139739990234, "logps_train/policy_2_w": -111.73157501220703, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.476043462753296, "rewards_train/1-l": -2.7622122764587402, "rewards_train/1-w": 2.935520648956299, "rewards_train/2-2": 3.0887975692749023, "rewards_train/2-w": 1.9174675941467285, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.697732925415039, "rewards_train/margins_1": 1.459477186203003, "rewards_train/margins_2": 1.1713299751281738, "step": 440 }, { "epoch": 1.32, "logps_train/policy_1_2": -168.64114379882812, "logps_train/policy_1_l": -202.17080688476562, "logps_train/policy_1_w": -94.41724395751953, "logps_train/policy_2_2": -133.4219512939453, "logps_train/policy_2_w": -129.49978637695312, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 1.1921347379684448, "rewards_train/1-l": -3.602627754211426, "rewards_train/1-w": 3.3707756996154785, "rewards_train/2-2": 2.835148334503174, "rewards_train/2-w": 1.7304902076721191, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.973403453826904, "rewards_train/margins_1": 2.1786409616470337, "rewards_train/margins_2": 1.1046581268310547, "step": 440 }, { "epoch": 1.32, "logps_train/policy_1_2": -208.20266723632812, "logps_train/policy_1_l": -169.64410400390625, "logps_train/policy_1_w": -109.27813720703125, "logps_train/policy_2_2": -159.84048461914062, "logps_train/policy_2_w": -149.75384521484375, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 1.528171420097351, "rewards_train/1-l": -1.892730474472046, "rewards_train/1-w": 3.2548038959503174, "rewards_train/2-2": 3.552670955657959, "rewards_train/2-w": 1.926861047744751, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.147534370422363, "rewards_train/margins_1": 1.7266324758529663, "rewards_train/margins_2": 1.625809907913208, "step": 440 }, { "epoch": 1.32, "logps_train/policy_1_2": -95.68598175048828, "logps_train/policy_1_l": -149.9368438720703, "logps_train/policy_1_w": -94.42741394042969, "logps_train/policy_2_2": -78.82528686523438, "logps_train/policy_2_w": -117.3648910522461, "logps_train/ref_1_2": -114.5, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.9026914834976196, "rewards_train/1-l": -1.6095060110092163, "rewards_train/1-w": 2.9080398082733154, "rewards_train/2-2": 2.507413148880005, "rewards_train/2-w": 1.8236677646636963, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.517545819282532, "rewards_train/margins_1": 1.0053483247756958, "rewards_train/margins_2": 0.6837453842163086, "step": 440 }, { "epoch": 1.32, "logps_train/policy_1_2": -108.80860900878906, "logps_train/policy_1_l": -81.12245178222656, "logps_train/policy_1_w": -115.80950927734375, "logps_train/policy_2_2": -86.34514617919922, "logps_train/policy_2_w": -136.3794708251953, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -71.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.7042949199676514, "rewards_train/1-l": -0.9952528476715088, "rewards_train/1-w": 2.7198305130004883, "rewards_train/2-2": 2.4326729774475098, "rewards_train/2-w": 2.047210216522217, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.715083360671997, "rewards_train/margins_1": 1.015535593032837, "rewards_train/margins_2": 0.38546276092529297, "step": 440 }, { "epoch": 1.32, "logps_train/policy_1_2": -236.20404052734375, "logps_train/policy_1_l": -294.10687255859375, "logps_train/policy_1_w": -185.8716583251953, "logps_train/policy_2_2": -190.54165649414062, "logps_train/policy_2_w": -239.7393798828125, "logps_train/ref_1_2": -254.0, "logps_train/ref_1_l": -262.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -226.0, "logps_train/ref_2_w": -258.0, "rewards_train/1-2": 1.7499088048934937, "rewards_train/1-l": -3.2685000896453857, "rewards_train/1-w": 3.7331461906433105, "rewards_train/2-2": 3.5598950386047363, "rewards_train/2-w": 1.8291866779327393, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.001646280288696, "rewards_train/margins_1": 1.983237385749817, "rewards_train/margins_2": 1.730708360671997, "step": 441 }, { "epoch": 1.32, "logps_train/policy_1_2": -91.19088745117188, "logps_train/policy_1_l": -148.63369750976562, "logps_train/policy_1_w": -69.76551055908203, "logps_train/policy_2_2": -73.83485412597656, "logps_train/policy_2_w": -97.54302215576172, "logps_train/ref_1_2": -106.5, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -90.5, "logps_train/ref_2_2": -94.5, "logps_train/ref_2_w": -107.0, "rewards_train/1-2": 1.5535677671432495, "rewards_train/1-l": -2.4324123859405518, "rewards_train/1-w": 2.0629022121429443, "rewards_train/2-2": 2.097764015197754, "rewards_train/2-w": 0.9488220810890198, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.495314598083496, "rewards_train/margins_1": 0.5093344449996948, "rewards_train/margins_2": 1.1489419341087341, "step": 441 }, { "epoch": 1.32, "logps_train/policy_1_2": -168.07310485839844, "logps_train/policy_1_l": -143.72564697265625, "logps_train/policy_1_w": -122.330078125, "logps_train/policy_2_2": -129.62155151367188, "logps_train/policy_2_w": -167.8683624267578, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 2.122377395629883, "rewards_train/1-l": -2.1256890296936035, "rewards_train/1-w": 3.3873047828674316, "rewards_train/2-2": 3.8128437995910645, "rewards_train/2-w": 1.624491572380066, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.512993812561035, "rewards_train/margins_1": 1.2649273872375488, "rewards_train/margins_2": 2.1883522272109985, "step": 441 }, { "epoch": 1.32, "logps_train/policy_1_2": -125.28594207763672, "logps_train/policy_1_l": -198.55567932128906, "logps_train/policy_1_w": -101.3837661743164, "logps_train/policy_2_2": -87.82206726074219, "logps_train/policy_2_w": -141.62481689453125, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 0.9991402626037598, "rewards_train/1-l": -2.8993186950683594, "rewards_train/1-w": 3.0295920372009277, "rewards_train/2-2": 2.388106346130371, "rewards_train/2-w": 1.553534746170044, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.928910732269287, "rewards_train/margins_1": 2.030451774597168, "rewards_train/margins_2": 0.8345715999603271, "step": 441 }, { "epoch": 1.32, "logps_train/policy_1_2": -138.17214965820312, "logps_train/policy_1_l": -134.0696258544922, "logps_train/policy_1_w": -109.98422241210938, "logps_train/policy_2_2": -113.2213363647461, "logps_train/policy_2_w": -144.9435272216797, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.5187230110168457, "rewards_train/1-l": -1.806962490081787, "rewards_train/1-w": 3.0390772819519043, "rewards_train/2-2": 2.6966159343719482, "rewards_train/2-w": 1.6056478023529053, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.846039772033691, "rewards_train/margins_1": 1.5203542709350586, "rewards_train/margins_2": 1.090968132019043, "step": 441 }, { "epoch": 1.32, "logps_train/policy_1_2": -149.9839630126953, "logps_train/policy_1_l": -173.61190795898438, "logps_train/policy_1_w": -147.56027221679688, "logps_train/policy_2_2": -107.37440490722656, "logps_train/policy_2_w": -198.6626739501953, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.8180100917816162, "rewards_train/1-l": -1.9572839736938477, "rewards_train/1-w": 4.037723064422607, "rewards_train/2-2": 3.3406851291656494, "rewards_train/2-w": 1.9962321519851685, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.995007038116455, "rewards_train/margins_1": 2.219712972640991, "rewards_train/margins_2": 1.344452977180481, "step": 441 }, { "epoch": 1.32, "logps_train/policy_1_2": -237.68563842773438, "logps_train/policy_1_l": -239.22695922851562, "logps_train/policy_1_w": -143.74832153320312, "logps_train/policy_2_2": -188.2808837890625, "logps_train/policy_2_w": -187.13845825195312, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -223.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 0.7814372777938843, "rewards_train/1-l": -1.761757731437683, "rewards_train/1-w": 3.426729202270508, "rewards_train/2-2": 3.4531612396240234, "rewards_train/2-w": 2.081467390060425, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.188486933708191, "rewards_train/margins_1": 2.6452919244766235, "rewards_train/margins_2": 1.3716938495635986, "step": 441 }, { "epoch": 1.32, "logps_train/policy_1_2": -119.41085815429688, "logps_train/policy_1_l": -80.407958984375, "logps_train/policy_1_w": -93.50814819335938, "logps_train/policy_2_2": -80.31517028808594, "logps_train/policy_2_w": -140.9805908203125, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -71.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 0.8854773044586182, "rewards_train/1-l": -0.9777098894119263, "rewards_train/1-w": 3.0116848945617676, "rewards_train/2-2": 2.6356706619262695, "rewards_train/2-w": 1.28631591796875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.989394783973694, "rewards_train/margins_1": 2.1262075901031494, "rewards_train/margins_2": 1.3493547439575195, "step": 441 }, { "epoch": 1.32, "learning_rate": 1.4248369943086997e-06, "loss": 0.4057, "step": 442 }, { "epoch": 1.32, "logps_train/policy_1_2": -125.45994567871094, "logps_train/policy_1_l": -158.16934204101562, "logps_train/policy_1_w": -100.40737915039062, "logps_train/policy_2_2": -103.62533569335938, "logps_train/policy_2_w": -132.9559326171875, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.308107852935791, "rewards_train/1-l": -2.4153707027435303, "rewards_train/1-w": 2.9022302627563477, "rewards_train/2-2": 2.18473219871521, "rewards_train/2-w": 1.720813274383545, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.317600965499878, "rewards_train/margins_1": 1.5941224098205566, "rewards_train/margins_2": 0.46391892433166504, "step": 442 }, { "epoch": 1.32, "logps_train/policy_1_2": -80.11599731445312, "logps_train/policy_1_l": -87.74523162841797, "logps_train/policy_1_w": -90.1938705444336, "logps_train/policy_2_2": -54.416412353515625, "logps_train/policy_2_w": -133.21267700195312, "logps_train/ref_1_2": -92.5, "logps_train/ref_1_l": -74.0, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -77.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 1.2501194477081299, "rewards_train/1-l": -1.3866324424743652, "rewards_train/1-w": 2.825925350189209, "rewards_train/2-2": 2.2646093368530273, "rewards_train/2-w": 1.0115454196929932, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.212557792663574, "rewards_train/margins_1": 1.575805902481079, "rewards_train/margins_2": 1.2530639171600342, "step": 442 }, { "epoch": 1.32, "logps_train/policy_1_2": -78.45513153076172, "logps_train/policy_1_l": -170.59982299804688, "logps_train/policy_1_w": -75.40229797363281, "logps_train/policy_2_2": -63.54207992553711, "logps_train/policy_2_w": -98.77824401855469, "logps_train/ref_1_2": -95.5, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -84.0, "logps_train/ref_2_w": -113.5, "rewards_train/1-2": 1.7162057161331177, "rewards_train/1-l": -2.854318141937256, "rewards_train/1-w": 2.686333179473877, "rewards_train/2-2": 2.0551674365997314, "rewards_train/2-w": 1.4753005504608154, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.540651321411133, "rewards_train/margins_1": 0.9701274633407593, "rewards_train/margins_2": 0.579866886138916, "step": 442 }, { "epoch": 1.32, "logps_train/policy_1_2": -154.82754516601562, "logps_train/policy_1_l": -210.68386840820312, "logps_train/policy_1_w": -159.25173950195312, "logps_train/policy_2_2": -127.4572982788086, "logps_train/policy_2_w": -201.11642456054688, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 2.414120674133301, "rewards_train/1-l": -2.5592076778411865, "rewards_train/1-w": 3.9693562984466553, "rewards_train/2-2": 3.2214574813842773, "rewards_train/2-w": 2.1133575439453125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.528563976287842, "rewards_train/margins_1": 1.5552356243133545, "rewards_train/margins_2": 1.1080999374389648, "step": 442 }, { "epoch": 1.32, "logps_train/policy_1_2": -203.80709838867188, "logps_train/policy_1_l": -277.3743591308594, "logps_train/policy_1_w": -128.785400390625, "logps_train/policy_2_2": -160.62388610839844, "logps_train/policy_2_w": -168.07533264160156, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -236.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.6099144220352173, "rewards_train/1-l": -4.101107597351074, "rewards_train/1-w": 3.835521936416626, "rewards_train/2-2": 3.568861246109009, "rewards_train/2-w": 2.3049674034118652, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.9366295337677, "rewards_train/margins_1": 2.2256075143814087, "rewards_train/margins_2": 1.2638938426971436, "step": 442 }, { "epoch": 1.32, "logps_train/policy_1_2": -139.74002075195312, "logps_train/policy_1_l": -141.59552001953125, "logps_train/policy_1_w": -122.83749389648438, "logps_train/policy_2_2": -106.69770050048828, "logps_train/policy_2_w": -180.2852783203125, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.6213107109069824, "rewards_train/1-l": -1.8926588296890259, "rewards_train/1-w": 3.678457260131836, "rewards_train/2-2": 2.5927300453186035, "rewards_train/2-w": 1.08416748046875, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.571116089820862, "rewards_train/margins_1": 2.0571465492248535, "rewards_train/margins_2": 1.5085625648498535, "step": 442 }, { "epoch": 1.32, "logps_train/policy_1_2": -166.68963623046875, "logps_train/policy_1_l": -98.98257446289062, "logps_train/policy_1_w": -70.89828491210938, "logps_train/policy_2_2": -133.36688232421875, "logps_train/policy_2_w": -102.4327392578125, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -98.5, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 1.691192865371704, "rewards_train/1-l": -1.7882965803146362, "rewards_train/1-w": 2.7660305500030518, "rewards_train/2-2": 3.0101871490478516, "rewards_train/2-w": 1.3602416515350342, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.554327130317688, "rewards_train/margins_1": 1.0748376846313477, "rewards_train/margins_2": 1.6499454975128174, "step": 442 }, { "epoch": 1.32, "logps_train/policy_1_2": -193.93052673339844, "logps_train/policy_1_l": -185.40574645996094, "logps_train/policy_1_w": -163.25308227539062, "logps_train/policy_2_2": -149.106689453125, "logps_train/policy_2_w": -218.18470764160156, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -233.0, "rewards_train/1-2": 1.6014784574508667, "rewards_train/1-l": -2.0620594024658203, "rewards_train/1-w": 4.243441581726074, "rewards_train/2-2": 3.4447991847991943, "rewards_train/2-w": 1.4877804517745972, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.3055009841918945, "rewards_train/margins_1": 2.6419631242752075, "rewards_train/margins_2": 1.9570187330245972, "step": 442 }, { "epoch": 1.33, "logps_train/policy_1_2": -123.28948211669922, "logps_train/policy_1_l": -145.12969970703125, "logps_train/policy_1_w": -108.71273040771484, "logps_train/policy_2_2": -99.09134674072266, "logps_train/policy_2_w": -136.18392944335938, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.9046454429626465, "rewards_train/1-l": -2.250568389892578, "rewards_train/1-w": 3.236539363861084, "rewards_train/2-2": 2.929537296295166, "rewards_train/2-w": 1.973793625831604, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.487107753753662, "rewards_train/margins_1": 1.3318939208984375, "rewards_train/margins_2": 0.955743670463562, "step": 443 }, { "epoch": 1.33, "logps_train/policy_1_2": -155.82388305664062, "logps_train/policy_1_l": -109.05265808105469, "logps_train/policy_1_w": -102.89533996582031, "logps_train/policy_2_2": -124.53697204589844, "logps_train/policy_2_w": -136.72174072265625, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.2512050867080688, "rewards_train/1-l": -1.5941331386566162, "rewards_train/1-w": 3.378117084503174, "rewards_train/2-2": 2.547084093093872, "rewards_train/2-w": 1.5832942724227905, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.97225022315979, "rewards_train/margins_1": 2.126911997795105, "rewards_train/margins_2": 0.9637898206710815, "step": 443 }, { "epoch": 1.33, "logps_train/policy_1_2": -153.90284729003906, "logps_train/policy_1_l": -118.82456970214844, "logps_train/policy_1_w": -106.25557708740234, "logps_train/policy_2_2": -133.4405975341797, "logps_train/policy_2_w": -132.45437622070312, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.8737776279449463, "rewards_train/1-l": -1.243199110031128, "rewards_train/1-w": 3.093583345413208, "rewards_train/2-2": 2.713752508163452, "rewards_train/2-w": 2.2170629501342773, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.336782455444336, "rewards_train/margins_1": 1.2198057174682617, "rewards_train/margins_2": 0.4966895580291748, "step": 443 }, { "epoch": 1.33, "logps_train/policy_1_2": -33.88092041015625, "logps_train/policy_1_l": -56.875587463378906, "logps_train/policy_1_w": -43.424659729003906, "logps_train/policy_2_2": -22.793310165405273, "logps_train/policy_2_w": -56.218406677246094, "logps_train/ref_1_2": -41.0, "logps_train/ref_1_l": -42.0, "logps_train/ref_1_w": -55.5, "logps_train/ref_2_2": -34.5, "logps_train/ref_2_w": -63.5, "rewards_train/1-2": 0.6990171670913696, "rewards_train/1-l": -1.5085307359695435, "rewards_train/1-w": 1.2051167488098145, "rewards_train/2-2": 1.170278549194336, "rewards_train/2-w": 0.7114111185073853, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.713647484779358, "rewards_train/margins_1": 0.5060995817184448, "rewards_train/margins_2": 0.4588674306869507, "step": 443 }, { "epoch": 1.33, "logps_train/policy_1_2": -183.90667724609375, "logps_train/policy_1_l": -196.45286560058594, "logps_train/policy_1_w": -109.15447998046875, "logps_train/policy_2_2": -146.76995849609375, "logps_train/policy_2_w": -143.4376678466797, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.9235901832580566, "rewards_train/1-l": -2.2054433822631836, "rewards_train/1-w": 3.444122314453125, "rewards_train/2-2": 3.508746862411499, "rewards_train/2-w": 2.0279123783111572, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.649565696716309, "rewards_train/margins_1": 1.5205321311950684, "rewards_train/margins_2": 1.4808344841003418, "step": 443 }, { "epoch": 1.33, "logps_train/policy_1_2": -138.81227111816406, "logps_train/policy_1_l": -142.8377227783203, "logps_train/policy_1_w": -100.25991821289062, "logps_train/policy_2_2": -102.45048522949219, "logps_train/policy_2_w": -142.43255615234375, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -119.5, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 0.9546123743057251, "rewards_train/1-l": -2.3411943912506104, "rewards_train/1-w": 2.5251805782318115, "rewards_train/2-2": 2.618525981903076, "rewards_train/2-w": 0.6782296895980835, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.866374969482422, "rewards_train/margins_1": 1.5705682039260864, "rewards_train/margins_2": 1.9402962923049927, "step": 443 }, { "epoch": 1.33, "logps_train/policy_1_2": -192.38119506835938, "logps_train/policy_1_l": -171.7721710205078, "logps_train/policy_1_w": -143.92918395996094, "logps_train/policy_2_2": -155.26898193359375, "logps_train/policy_2_w": -177.8802490234375, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 2.583754539489746, "rewards_train/1-l": -1.6985068321228027, "rewards_train/1-w": 3.2051286697387695, "rewards_train/2-2": 4.344976425170898, "rewards_train/2-w": 1.8045541048049927, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.903635501861572, "rewards_train/margins_1": 0.6213741302490234, "rewards_train/margins_2": 2.5404223203659058, "step": 443 }, { "epoch": 1.33, "logps_train/policy_1_2": -143.27638244628906, "logps_train/policy_1_l": -134.57240295410156, "logps_train/policy_1_w": -120.31463623046875, "logps_train/policy_2_2": -112.3115005493164, "logps_train/policy_2_w": -163.3564453125, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.191111445426941, "rewards_train/1-l": -1.2583149671554565, "rewards_train/1-w": 4.299786567687988, "rewards_train/2-2": 2.4510769844055176, "rewards_train/2-w": 2.726855516433716, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.558101534843445, "rewards_train/margins_1": 3.1086751222610474, "rewards_train/margins_2": -0.27577853202819824, "step": 443 }, { "epoch": 1.33, "learning_rate": 1.402592473146766e-06, "loss": 0.4643, "step": 444 }, { "epoch": 1.33, "logps_train/policy_1_2": -203.72247314453125, "logps_train/policy_1_l": -175.50390625, "logps_train/policy_1_w": -177.93936157226562, "logps_train/policy_2_2": -157.73715209960938, "logps_train/policy_2_w": -227.0987548828125, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -222.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -251.0, "rewards_train/1-2": 1.2330269813537598, "rewards_train/1-l": -1.673827052116394, "rewards_train/1-w": 4.362314224243164, "rewards_train/2-2": 3.3990378379821777, "rewards_train/2-w": 2.3745007514953613, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.036141276359558, "rewards_train/margins_1": 3.1292872428894043, "rewards_train/margins_2": 1.0245370864868164, "step": 444 }, { "epoch": 1.33, "logps_train/policy_1_2": -119.4066162109375, "logps_train/policy_1_l": -131.67898559570312, "logps_train/policy_1_w": -79.42790985107422, "logps_train/policy_2_2": -82.54427337646484, "logps_train/policy_2_w": -116.20901489257812, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 1.062463641166687, "rewards_train/1-l": -2.0838770866394043, "rewards_train/1-w": 2.357795476913452, "rewards_train/2-2": 2.148698091506958, "rewards_train/2-w": 1.0870087146759033, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.4416725635528564, "rewards_train/margins_1": 1.2953318357467651, "rewards_train/margins_2": 1.0616893768310547, "step": 444 }, { "epoch": 1.33, "logps_train/policy_1_2": -187.95535278320312, "logps_train/policy_1_l": -161.89852905273438, "logps_train/policy_1_w": -103.14718627929688, "logps_train/policy_2_2": -136.95826721191406, "logps_train/policy_2_w": -157.96844482421875, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 0.8450889587402344, "rewards_train/1-l": -2.3195395469665527, "rewards_train/1-w": 3.7868432998657227, "rewards_train/2-2": 3.1463608741760254, "rewards_train/2-w": 1.4406545162200928, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.106382846832275, "rewards_train/margins_1": 2.9417543411254883, "rewards_train/margins_2": 1.7057063579559326, "step": 444 }, { "epoch": 1.33, "logps_train/policy_1_2": -179.99537658691406, "logps_train/policy_1_l": -181.831787109375, "logps_train/policy_1_w": -145.41744995117188, "logps_train/policy_2_2": -140.69427490234375, "logps_train/policy_2_w": -190.5115966796875, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.8481186628341675, "rewards_train/1-l": -1.991382122039795, "rewards_train/1-w": 3.6098175048828125, "rewards_train/2-2": 3.4719784259796143, "rewards_train/2-w": 1.761340618133545, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.601199626922607, "rewards_train/margins_1": 1.761698842048645, "rewards_train/margins_2": 1.7106378078460693, "step": 444 }, { "epoch": 1.33, "logps_train/policy_1_2": -99.40213775634766, "logps_train/policy_1_l": -125.48756408691406, "logps_train/policy_1_w": -113.34115600585938, "logps_train/policy_2_2": -72.91547393798828, "logps_train/policy_2_w": -142.11618041992188, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.6687705516815186, "rewards_train/1-l": -1.1818616390228271, "rewards_train/1-w": 2.9807286262512207, "rewards_train/2-2": 2.528374671936035, "rewards_train/2-w": 1.50010085105896, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.162590265274048, "rewards_train/margins_1": 1.3119580745697021, "rewards_train/margins_2": 1.0282738208770752, "step": 444 }, { "epoch": 1.33, "logps_train/policy_1_2": -202.58877563476562, "logps_train/policy_1_l": -202.42311096191406, "logps_train/policy_1_w": -145.2899627685547, "logps_train/policy_2_2": -156.63323974609375, "logps_train/policy_2_w": -195.48013305664062, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.4036234617233276, "rewards_train/1-l": -2.2016868591308594, "rewards_train/1-w": 2.841315746307373, "rewards_train/2-2": 3.480426073074341, "rewards_train/2-w": 0.6754236817359924, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.043002605438232, "rewards_train/margins_1": 1.4376922845840454, "rewards_train/margins_2": 2.8050023913383484, "step": 444 }, { "epoch": 1.33, "logps_train/policy_1_2": -153.67694091796875, "logps_train/policy_1_l": -256.931396484375, "logps_train/policy_1_w": -196.54232788085938, "logps_train/policy_2_2": -118.77802276611328, "logps_train/policy_2_w": -270.39764404296875, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -242.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -278.0, "rewards_train/1-2": 2.422931671142578, "rewards_train/1-l": -2.9286839962005615, "rewards_train/1-w": 4.503579616546631, "rewards_train/2-2": 3.128448009490967, "rewards_train/2-w": 0.857111394405365, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.432263612747192, "rewards_train/margins_1": 2.0806479454040527, "rewards_train/margins_2": 2.271336615085602, "step": 444 }, { "epoch": 1.33, "logps_train/policy_1_2": -116.35938262939453, "logps_train/policy_1_l": -155.01742553710938, "logps_train/policy_1_w": -94.908447265625, "logps_train/policy_2_2": -88.33201599121094, "logps_train/policy_2_w": -134.16561889648438, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.5933589935302734, "rewards_train/1-l": -1.9173438549041748, "rewards_train/1-w": 3.304492473602295, "rewards_train/2-2": 2.506251811981201, "rewards_train/2-w": 1.8949376344680786, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.22183632850647, "rewards_train/margins_1": 1.7111334800720215, "rewards_train/margins_2": 0.6113141775131226, "step": 444 }, { "epoch": 1.33, "logps_train/policy_1_2": -113.81144714355469, "logps_train/policy_1_l": -109.70150756835938, "logps_train/policy_1_w": -110.68681335449219, "logps_train/policy_2_2": -84.54195404052734, "logps_train/policy_2_w": -140.83078002929688, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 1.1852614879608154, "rewards_train/1-l": -1.6674163341522217, "rewards_train/1-w": 2.4016313552856445, "rewards_train/2-2": 2.1997110843658447, "rewards_train/2-w": 1.0200468301773071, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.069047689437866, "rewards_train/margins_1": 1.216369867324829, "rewards_train/margins_2": 1.1796642541885376, "step": 445 }, { "epoch": 1.33, "logps_train/policy_1_2": -181.01654052734375, "logps_train/policy_1_l": -158.53111267089844, "logps_train/policy_1_w": -112.47738647460938, "logps_train/policy_2_2": -140.694091796875, "logps_train/policy_2_w": -157.24342346191406, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.0233455896377563, "rewards_train/1-l": -1.6204946041107178, "rewards_train/1-w": 2.756948947906494, "rewards_train/2-2": 2.8626229763031006, "rewards_train/2-w": 1.0615949630737305, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.377443552017212, "rewards_train/margins_1": 1.7336033582687378, "rewards_train/margins_2": 1.8010280132293701, "step": 445 }, { "epoch": 1.33, "logps_train/policy_1_2": -131.86671447753906, "logps_train/policy_1_l": -107.2255859375, "logps_train/policy_1_w": -100.29446411132812, "logps_train/policy_2_2": -101.87966918945312, "logps_train/policy_2_w": -127.8543701171875, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 1.6555159091949463, "rewards_train/1-l": -1.4131834506988525, "rewards_train/1-w": 2.5877413749694824, "rewards_train/2-2": 2.783907413482666, "rewards_train/2-w": 1.5114376544952393, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.000924825668335, "rewards_train/margins_1": 0.9322254657745361, "rewards_train/margins_2": 1.2724697589874268, "step": 445 }, { "epoch": 1.33, "logps_train/policy_1_2": -177.0460662841797, "logps_train/policy_1_l": -101.00079345703125, "logps_train/policy_1_w": -92.83277893066406, "logps_train/policy_2_2": -142.3429718017578, "logps_train/policy_2_w": -129.84678649902344, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -79.5, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.0797683000564575, "rewards_train/1-l": -2.15879487991333, "rewards_train/1-w": 3.0051016807556152, "rewards_train/2-2": 2.5141396522521973, "rewards_train/2-w": 1.580359935760498, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.163896560668945, "rewards_train/margins_1": 1.9253333806991577, "rewards_train/margins_2": 0.9337797164916992, "step": 445 }, { "epoch": 1.33, "logps_train/policy_1_2": -139.72183227539062, "logps_train/policy_1_l": -133.94183349609375, "logps_train/policy_1_w": -93.5543212890625, "logps_train/policy_2_2": -97.12464904785156, "logps_train/policy_2_w": -134.71165466308594, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.2481305599212646, "rewards_train/1-l": -1.8269962072372437, "rewards_train/1-w": 3.552380084991455, "rewards_train/2-2": 2.6547229290008545, "rewards_train/2-w": 1.8772718906402588, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.379376292228699, "rewards_train/margins_1": 2.3042495250701904, "rewards_train/margins_2": 0.7774510383605957, "step": 445 }, { "epoch": 1.33, "logps_train/policy_1_2": -154.26699829101562, "logps_train/policy_1_l": -141.6597900390625, "logps_train/policy_1_w": -106.98995208740234, "logps_train/policy_2_2": -116.3813705444336, "logps_train/policy_2_w": -140.76971435546875, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.6873633861541748, "rewards_train/1-l": -1.666882038116455, "rewards_train/1-w": 2.7133827209472656, "rewards_train/2-2": 2.921238422393799, "rewards_train/2-w": 1.9132624864578247, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.380264759063721, "rewards_train/margins_1": 1.0260193347930908, "rewards_train/margins_2": 1.0079759359359741, "step": 445 }, { "epoch": 1.33, "logps_train/policy_1_2": -116.4677734375, "logps_train/policy_1_l": -98.26309967041016, "logps_train/policy_1_w": -78.0799560546875, "logps_train/policy_2_2": -87.842529296875, "logps_train/policy_2_w": -96.38201904296875, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 1.1579103469848633, "rewards_train/1-l": -1.6889078617095947, "rewards_train/1-w": 2.2357540130615234, "rewards_train/2-2": 2.3094968795776367, "rewards_train/2-w": 1.4797669649124146, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.924661874771118, "rewards_train/margins_1": 1.0778436660766602, "rewards_train/margins_2": 0.8297299146652222, "step": 445 }, { "epoch": 1.33, "logps_train/policy_1_2": -136.50921630859375, "logps_train/policy_1_l": -142.09280395507812, "logps_train/policy_1_w": -116.03533172607422, "logps_train/policy_2_2": -112.34765625, "logps_train/policy_2_w": -148.39488220214844, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.1811094284057617, "rewards_train/1-l": -1.810843825340271, "rewards_train/1-w": 2.819904327392578, "rewards_train/2-2": 2.4027340412139893, "rewards_train/2-w": 1.4308240413665771, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.630748152732849, "rewards_train/margins_1": 1.6387948989868164, "rewards_train/margins_2": 0.9719099998474121, "step": 445 }, { "epoch": 1.34, "learning_rate": 1.3804550570248431e-06, "loss": 0.4122, "step": 446 }, { "epoch": 1.34, "logps_train/policy_1_2": -217.7612762451172, "logps_train/policy_1_l": -301.5529479980469, "logps_train/policy_1_w": -145.32565307617188, "logps_train/policy_2_2": -166.5148162841797, "logps_train/policy_2_w": -196.88912963867188, "logps_train/ref_1_2": -241.0, "logps_train/ref_1_l": -268.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -211.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 2.278560161590576, "rewards_train/1-l": -3.368576765060425, "rewards_train/1-w": 4.398684501647949, "rewards_train/2-2": 4.412580490112305, "rewards_train/2-w": 2.54233717918396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.767261266708374, "rewards_train/margins_1": 2.120124340057373, "rewards_train/margins_2": 1.8702433109283447, "step": 446 }, { "epoch": 1.34, "logps_train/policy_1_2": -104.13433837890625, "logps_train/policy_1_l": -239.7940673828125, "logps_train/policy_1_w": -113.23043823242188, "logps_train/policy_2_2": -78.3599853515625, "logps_train/policy_2_w": -155.7082977294922, "logps_train/ref_1_2": -122.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 1.796722173690796, "rewards_train/1-l": -2.99288272857666, "rewards_train/1-w": 3.3410189151763916, "rewards_train/2-2": 2.39251708984375, "rewards_train/2-w": 1.3112019300460815, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.333901643753052, "rewards_train/margins_1": 1.5442967414855957, "rewards_train/margins_2": 1.0813151597976685, "step": 446 }, { "epoch": 1.34, "logps_train/policy_1_2": -181.72903442382812, "logps_train/policy_1_l": -101.90017700195312, "logps_train/policy_1_w": -99.54417419433594, "logps_train/policy_2_2": -135.26724243164062, "logps_train/policy_2_w": -130.3631134033203, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 1.2442848682403564, "rewards_train/1-l": -1.90134596824646, "rewards_train/1-w": 3.2291758060455322, "rewards_train/2-2": 3.7103848457336426, "rewards_train/2-w": 1.8980638980865479, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.130521774291992, "rewards_train/margins_1": 1.9848909378051758, "rewards_train/margins_2": 1.8123209476470947, "step": 446 }, { "epoch": 1.34, "logps_train/policy_1_2": -136.40634155273438, "logps_train/policy_1_l": -103.7427749633789, "logps_train/policy_1_w": -117.83467864990234, "logps_train/policy_2_2": -90.86222839355469, "logps_train/policy_2_w": -170.7390899658203, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.1085841655731201, "rewards_train/1-l": -1.3787691593170166, "rewards_train/1-w": 3.519657850265503, "rewards_train/2-2": 2.873152017593384, "rewards_train/2-w": 1.154215693473816, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.8984270095825195, "rewards_train/margins_1": 2.411073684692383, "rewards_train/margins_2": 1.7189363241195679, "step": 446 }, { "epoch": 1.34, "logps_train/policy_1_2": -186.17706298828125, "logps_train/policy_1_l": -146.63172912597656, "logps_train/policy_1_w": -131.25831604003906, "logps_train/policy_2_2": -139.68145751953125, "logps_train/policy_2_w": -164.41360473632812, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 2.1885428428649902, "rewards_train/1-l": -2.031336545944214, "rewards_train/1-w": 3.6005353927612305, "rewards_train/2-2": 3.9396674633026123, "rewards_train/2-w": 2.455514430999756, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.631871938705444, "rewards_train/margins_1": 1.4119925498962402, "rewards_train/margins_2": 1.4841530323028564, "step": 446 }, { "epoch": 1.34, "logps_train/policy_1_2": -210.1314697265625, "logps_train/policy_1_l": -249.917724609375, "logps_train/policy_1_w": -149.89901733398438, "logps_train/policy_2_2": -167.2608642578125, "logps_train/policy_2_w": -181.87991333007812, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -220.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 2.7493534088134766, "rewards_train/1-l": -2.982006549835205, "rewards_train/1-w": 3.586855173110962, "rewards_train/2-2": 4.173913955688477, "rewards_train/2-w": 2.3190393447875977, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.568861722946167, "rewards_train/margins_1": 0.8375017642974854, "rewards_train/margins_2": 1.854874610900879, "step": 446 }, { "epoch": 1.34, "logps_train/policy_1_2": -87.42993927001953, "logps_train/policy_1_l": -120.57347106933594, "logps_train/policy_1_w": -59.246192932128906, "logps_train/policy_2_2": -62.94783020019531, "logps_train/policy_2_w": -85.19876861572266, "logps_train/ref_1_2": -98.5, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -81.0, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -99.0, "rewards_train/1-2": 1.0730215311050415, "rewards_train/1-l": -2.8072495460510254, "rewards_train/1-w": 2.1367084980010986, "rewards_train/2-2": 2.2024827003479004, "rewards_train/2-w": 1.3359830379486084, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.943958044052124, "rewards_train/margins_1": 1.0636869668960571, "rewards_train/margins_2": 0.866499662399292, "step": 446 }, { "epoch": 1.34, "logps_train/policy_1_2": -185.4500732421875, "logps_train/policy_1_l": -182.838623046875, "logps_train/policy_1_w": -96.25656127929688, "logps_train/policy_2_2": -147.53150939941406, "logps_train/policy_2_w": -128.98025512695312, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.4188594818115234, "rewards_train/1-l": -2.4076919555664062, "rewards_train/1-w": 3.1751246452331543, "rewards_train/2-2": 2.8706777095794678, "rewards_train/2-w": 1.8953330516815186, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.5828166007995605, "rewards_train/margins_1": 1.7562651634216309, "rewards_train/margins_2": 0.9753446578979492, "step": 446 }, { "epoch": 1.34, "logps_train/policy_1_2": -189.93600463867188, "logps_train/policy_1_l": -286.20245361328125, "logps_train/policy_1_w": -119.31155395507812, "logps_train/policy_2_2": -144.06842041015625, "logps_train/policy_2_w": -161.77313232421875, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -247.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.7970236539840698, "rewards_train/1-l": -3.9206342697143555, "rewards_train/1-w": 2.9907188415527344, "rewards_train/2-2": 3.6158154010772705, "rewards_train/2-w": 1.4758121967315674, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.91135311126709, "rewards_train/margins_1": 1.1936951875686646, "rewards_train/margins_2": 2.140003204345703, "step": 447 }, { "epoch": 1.34, "logps_train/policy_1_2": -226.4786376953125, "logps_train/policy_1_l": -185.1599578857422, "logps_train/policy_1_w": -136.25161743164062, "logps_train/policy_2_2": -167.29324340820312, "logps_train/policy_2_w": -185.59642028808594, "logps_train/ref_1_2": -241.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": 1.4874378442764282, "rewards_train/1-l": -2.016629219055176, "rewards_train/1-w": 3.7601406574249268, "rewards_train/2-2": 3.8379597663879395, "rewards_train/2-w": 2.0888924598693848, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.7767698764801025, "rewards_train/margins_1": 2.2727028131484985, "rewards_train/margins_2": 1.7490673065185547, "step": 447 }, { "epoch": 1.34, "logps_train/policy_1_2": -114.17710876464844, "logps_train/policy_1_l": -103.41630554199219, "logps_train/policy_1_w": -99.58140563964844, "logps_train/policy_2_2": -85.94268035888672, "logps_train/policy_2_w": -129.47727966308594, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 0.9885387420654297, "rewards_train/1-l": -1.4795819520950317, "rewards_train/1-w": 2.1002955436706543, "rewards_train/2-2": 2.071357488632202, "rewards_train/2-w": 1.2173652648925781, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.579877495765686, "rewards_train/margins_1": 1.1117568016052246, "rewards_train/margins_2": 0.853992223739624, "step": 447 }, { "epoch": 1.34, "logps_train/policy_1_2": -166.18824768066406, "logps_train/policy_1_l": -202.89743041992188, "logps_train/policy_1_w": -155.43238830566406, "logps_train/policy_2_2": -130.27865600585938, "logps_train/policy_2_w": -203.22593688964844, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.3811745643615723, "rewards_train/1-l": -3.113962411880493, "rewards_train/1-w": 3.444261074066162, "rewards_train/2-2": 2.6748697757720947, "rewards_train/2-w": 1.7086560726165771, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.558223485946655, "rewards_train/margins_1": 2.06308650970459, "rewards_train/margins_2": 0.9662137031555176, "step": 447 }, { "epoch": 1.34, "logps_train/policy_1_2": -150.09437561035156, "logps_train/policy_1_l": -189.29054260253906, "logps_train/policy_1_w": -143.22117614746094, "logps_train/policy_2_2": -135.32232666015625, "logps_train/policy_2_w": -166.75094604492188, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -0.8907856941223145, "rewards_train/1-l": -2.1615731716156006, "rewards_train/1-w": 2.9808120727539062, "rewards_train/2-2": -0.2823307514190674, "rewards_train/2-w": 1.6600608825683594, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.142385244369507, "rewards_train/margins_1": 3.8715977668762207, "rewards_train/margins_2": -1.9423916339874268, "step": 447 }, { "epoch": 1.34, "logps_train/policy_1_2": -194.903076171875, "logps_train/policy_1_l": -202.47540283203125, "logps_train/policy_1_w": -156.79739379882812, "logps_train/policy_2_2": -151.82504272460938, "logps_train/policy_2_w": -212.3084259033203, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 2.08547306060791, "rewards_train/1-l": -2.664729118347168, "rewards_train/1-w": 4.114010810852051, "rewards_train/2-2": 3.646791696548462, "rewards_train/2-w": 1.9609532356262207, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.778739929199219, "rewards_train/margins_1": 2.0285377502441406, "rewards_train/margins_2": 1.6858384609222412, "step": 447 }, { "epoch": 1.34, "logps_train/policy_1_2": -221.1926727294922, "logps_train/policy_1_l": -207.87205505371094, "logps_train/policy_1_w": -98.98460388183594, "logps_train/policy_2_2": -164.912841796875, "logps_train/policy_2_w": -132.4642333984375, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 2.0338573455810547, "rewards_train/1-l": -3.3001933097839355, "rewards_train/1-w": 2.8995871543884277, "rewards_train/2-2": 4.275903701782227, "rewards_train/2-w": 1.5356086492538452, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.199780464172363, "rewards_train/margins_1": 0.865729808807373, "rewards_train/margins_2": 2.7402950525283813, "step": 447 }, { "epoch": 1.34, "logps_train/policy_1_2": -161.46817016601562, "logps_train/policy_1_l": -109.17440032958984, "logps_train/policy_1_w": -88.987060546875, "logps_train/policy_2_2": -122.79694366455078, "logps_train/policy_2_w": -131.67608642578125, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 0.6684174537658691, "rewards_train/1-l": -1.0885334014892578, "rewards_train/1-w": 2.7173094749450684, "rewards_train/2-2": 2.5089776515960693, "rewards_train/2-w": 1.078777551651001, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.805842876434326, "rewards_train/margins_1": 2.048892021179199, "rewards_train/margins_2": 1.4302000999450684, "step": 447 }, { "epoch": 1.34, "learning_rate": 1.3584269065157175e-06, "loss": 0.6, "step": 448 }, { "epoch": 1.34, "logps_train/policy_1_2": -103.27593994140625, "logps_train/policy_1_l": -82.23417663574219, "logps_train/policy_1_w": -65.42263793945312, "logps_train/policy_2_2": -77.93354797363281, "logps_train/policy_2_w": -84.63072967529297, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -66.5, "logps_train/ref_1_w": -82.0, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": 0.5661556720733643, "rewards_train/1-l": -1.5755659341812134, "rewards_train/1-w": 1.6690642833709717, "rewards_train/2-2": 1.814457893371582, "rewards_train/2-w": 0.7392708659172058, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.244630217552185, "rewards_train/margins_1": 1.1029086112976074, "rewards_train/margins_2": 1.0751870274543762, "step": 448 }, { "epoch": 1.34, "logps_train/policy_1_2": -177.6077880859375, "logps_train/policy_1_l": -162.72108459472656, "logps_train/policy_1_w": -121.11226654052734, "logps_train/policy_2_2": -134.11355590820312, "logps_train/policy_2_w": -157.43148803710938, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 2.168909788131714, "rewards_train/1-l": -1.9236705303192139, "rewards_train/1-w": 2.815335750579834, "rewards_train/2-2": 4.010519981384277, "rewards_train/2-w": 1.3662264347076416, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.739006280899048, "rewards_train/margins_1": 0.6464259624481201, "rewards_train/margins_2": 2.6442935466766357, "step": 448 }, { "epoch": 1.34, "logps_train/policy_1_2": -84.70784759521484, "logps_train/policy_1_l": -147.9743194580078, "logps_train/policy_1_w": -140.68603515625, "logps_train/policy_2_2": -64.36128234863281, "logps_train/policy_2_w": -190.35093688964844, "logps_train/ref_1_2": -99.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -85.5, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.4198400974273682, "rewards_train/1-l": -2.191572904586792, "rewards_train/1-w": 3.3313958644866943, "rewards_train/2-2": 2.109184503555298, "rewards_train/2-w": 1.1586575508117676, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.522968769073486, "rewards_train/margins_1": 1.9115557670593262, "rewards_train/margins_2": 0.9505269527435303, "step": 448 }, { "epoch": 1.34, "logps_train/policy_1_2": -96.63905334472656, "logps_train/policy_1_l": -115.29679870605469, "logps_train/policy_1_w": -71.76220703125, "logps_train/policy_2_2": -72.9881591796875, "logps_train/policy_2_w": -106.19480895996094, "logps_train/ref_1_2": -103.5, "logps_train/ref_1_l": -100.5, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 0.6866322755813599, "rewards_train/1-l": -1.4669848680496216, "rewards_train/1-w": 1.8124513626098633, "rewards_train/2-2": 1.4135868549346924, "rewards_train/2-w": 0.45669081807136536, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.279436230659485, "rewards_train/margins_1": 1.1258190870285034, "rewards_train/margins_2": 0.956896036863327, "step": 448 }, { "epoch": 1.34, "logps_train/policy_1_2": -168.4986572265625, "logps_train/policy_1_l": -183.368408203125, "logps_train/policy_1_w": -106.62431335449219, "logps_train/policy_2_2": -138.45655822753906, "logps_train/policy_2_w": -133.32803344726562, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.442322015762329, "rewards_train/1-l": -2.847388744354248, "rewards_train/1-w": 2.8000693321228027, "rewards_train/2-2": 2.68715763092041, "rewards_train/2-w": 1.8734467029571533, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.647458076477051, "rewards_train/margins_1": 1.3577473163604736, "rewards_train/margins_2": 0.8137109279632568, "step": 448 }, { "epoch": 1.34, "logps_train/policy_1_2": -106.22586059570312, "logps_train/policy_1_l": -137.11000061035156, "logps_train/policy_1_w": -86.34612274169922, "logps_train/policy_2_2": -78.28096771240234, "logps_train/policy_2_w": -118.60552978515625, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.9344451427459717, "rewards_train/1-l": -2.0158824920654297, "rewards_train/1-w": 2.499762535095215, "rewards_train/2-2": 1.7012003660202026, "rewards_train/2-w": 1.4601500034332275, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.5156450271606445, "rewards_train/margins_1": 1.5653173923492432, "rewards_train/margins_2": 0.2410503625869751, "step": 448 }, { "epoch": 1.34, "logps_train/policy_1_2": -112.1390151977539, "logps_train/policy_1_l": -181.37432861328125, "logps_train/policy_1_w": -101.38348388671875, "logps_train/policy_2_2": -96.21485137939453, "logps_train/policy_2_w": -122.8658447265625, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 2.6610984802246094, "rewards_train/1-l": -3.6772756576538086, "rewards_train/1-w": 3.6460275650024414, "rewards_train/2-2": 3.1863269805908203, "rewards_train/2-w": 2.552478313446045, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.32330322265625, "rewards_train/margins_1": 0.984929084777832, "rewards_train/margins_2": 0.6338486671447754, "step": 448 }, { "epoch": 1.34, "logps_train/policy_1_2": -219.09503173828125, "logps_train/policy_1_l": -230.9911346435547, "logps_train/policy_1_w": -154.50592041015625, "logps_train/policy_2_2": -158.0786895751953, "logps_train/policy_2_w": -223.63792419433594, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -213.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -235.0, "rewards_train/1-2": -0.03216052055358887, "rewards_train/1-l": -1.8299720287322998, "rewards_train/1-w": 3.6962833404541016, "rewards_train/2-2": 2.646820068359375, "rewards_train/2-w": 1.1432377099990845, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.526255369186401, "rewards_train/margins_1": 3.7284438610076904, "rewards_train/margins_2": 1.5035823583602905, "step": 448 }, { "epoch": 1.34, "logps_train/policy_1_2": -127.63215637207031, "logps_train/policy_1_l": -169.43157958984375, "logps_train/policy_1_w": -94.84732818603516, "logps_train/policy_2_2": -94.67301940917969, "logps_train/policy_2_w": -124.99630737304688, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 1.5797529220581055, "rewards_train/1-l": -2.6992132663726807, "rewards_train/1-w": 3.0777673721313477, "rewards_train/2-2": 2.6241040229797363, "rewards_train/2-w": 2.220681667327881, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.776980638504028, "rewards_train/margins_1": 1.4980144500732422, "rewards_train/margins_2": 0.40342235565185547, "step": 449 }, { "epoch": 1.34, "logps_train/policy_1_2": -106.75051879882812, "logps_train/policy_1_l": -72.97917175292969, "logps_train/policy_1_w": -78.12429809570312, "logps_train/policy_2_2": -79.29180908203125, "logps_train/policy_2_w": -107.60692596435547, "logps_train/ref_1_2": -118.0, "logps_train/ref_1_l": -61.5, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -99.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 1.1186976432800293, "rewards_train/1-l": -1.1322921514511108, "rewards_train/1-w": 3.1563198566436768, "rewards_train/2-2": 1.9505064487457275, "rewards_train/2-w": 1.6268072128295898, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.288612008094788, "rewards_train/margins_1": 2.0376222133636475, "rewards_train/margins_2": 0.3236992359161377, "step": 449 }, { "epoch": 1.34, "logps_train/policy_1_2": -98.45722961425781, "logps_train/policy_1_l": -154.458984375, "logps_train/policy_1_w": -100.10560607910156, "logps_train/policy_2_2": -72.9297866821289, "logps_train/policy_2_w": -140.3106689453125, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -95.5, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 1.5641403198242188, "rewards_train/1-l": -1.0579091310501099, "rewards_train/1-w": 2.547348737716675, "rewards_train/2-2": 2.2620019912719727, "rewards_train/2-w": 0.6630741357803345, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6052578687667847, "rewards_train/margins_1": 0.983208417892456, "rewards_train/margins_2": 1.5989278554916382, "step": 449 }, { "epoch": 1.34, "logps_train/policy_1_2": -23.276079177856445, "logps_train/policy_1_l": -17.6860408782959, "logps_train/policy_1_w": -23.922138214111328, "logps_train/policy_2_2": -11.069123268127441, "logps_train/policy_2_w": -43.40970230102539, "logps_train/ref_1_2": -24.75, "logps_train/ref_1_l": -10.25, "logps_train/ref_1_w": -40.0, "logps_train/ref_2_2": -16.625, "logps_train/ref_2_w": -48.0, "rewards_train/1-2": 0.1399700939655304, "rewards_train/1-l": -0.745898962020874, "rewards_train/1-w": 1.6124736070632935, "rewards_train/2-2": 0.5575408339500427, "rewards_train/2-w": 0.46996715664863586, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 2.3583725690841675, "rewards_train/margins_1": 1.472503513097763, "rewards_train/margins_2": 0.08757367730140686, "step": 449 }, { "epoch": 1.34, "logps_train/policy_1_2": -144.9686279296875, "logps_train/policy_1_l": -104.2376480102539, "logps_train/policy_1_w": -46.425628662109375, "logps_train/policy_2_2": -99.52230072021484, "logps_train/policy_2_w": -71.52435302734375, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -69.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -86.0, "rewards_train/1-2": 0.32423141598701477, "rewards_train/1-l": -2.652866840362549, "rewards_train/1-w": 2.229311943054199, "rewards_train/2-2": 2.485269784927368, "rewards_train/2-w": 1.4819400310516357, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.882178783416748, "rewards_train/margins_1": 1.9050805270671844, "rewards_train/margins_2": 1.0033297538757324, "step": 449 }, { "epoch": 1.34, "logps_train/policy_1_2": -97.42418670654297, "logps_train/policy_1_l": -126.7552490234375, "logps_train/policy_1_w": -81.75129699707031, "logps_train/policy_2_2": -73.8139419555664, "logps_train/policy_2_w": -104.35443878173828, "logps_train/ref_1_2": -109.5, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -104.5, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 1.2251592874526978, "rewards_train/1-l": -1.8434935808181763, "rewards_train/1-w": 2.2740893363952637, "rewards_train/2-2": 2.26977801322937, "rewards_train/2-w": 1.5452196598052979, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.11758291721344, "rewards_train/margins_1": 1.048930048942566, "rewards_train/margins_2": 0.7245583534240723, "step": 449 }, { "epoch": 1.34, "logps_train/policy_1_2": -139.99215698242188, "logps_train/policy_1_l": -181.4327850341797, "logps_train/policy_1_w": -116.32946014404297, "logps_train/policy_2_2": -106.53923034667969, "logps_train/policy_2_w": -165.4354248046875, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.5445342063903809, "rewards_train/1-l": -2.8157386779785156, "rewards_train/1-w": 3.1826794147491455, "rewards_train/2-2": 2.9398269653320312, "rewards_train/2-w": 1.2408337593078613, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.998418092727661, "rewards_train/margins_1": 1.6381452083587646, "rewards_train/margins_2": 1.69899320602417, "step": 449 }, { "epoch": 1.34, "logps_train/policy_1_2": -114.90599060058594, "logps_train/policy_1_l": -212.76797485351562, "logps_train/policy_1_w": -146.69070434570312, "logps_train/policy_2_2": -97.87489318847656, "logps_train/policy_2_w": -179.84408569335938, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.9767837524414062, "rewards_train/1-l": -2.7742576599121094, "rewards_train/1-w": 2.9500699043273926, "rewards_train/2-2": 2.2148542404174805, "rewards_train/2-w": 1.4394190311431885, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.724327564239502, "rewards_train/margins_1": 0.9732861518859863, "rewards_train/margins_2": 0.775435209274292, "step": 449 }, { "epoch": 1.35, "learning_rate": 1.3365101715280473e-06, "loss": 0.5391, "step": 450 }, { "epoch": 1.35, "logps_train/policy_1_2": -111.76358032226562, "logps_train/policy_1_l": -121.05449676513672, "logps_train/policy_1_w": -106.19378662109375, "logps_train/policy_2_2": -84.4931640625, "logps_train/policy_2_w": -138.21066284179688, "logps_train/ref_1_2": -121.5, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 0.9892663955688477, "rewards_train/1-l": -1.9609184265136719, "rewards_train/1-w": 2.851714611053467, "rewards_train/2-2": 2.337597370147705, "rewards_train/2-w": 1.661942720413208, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.812633037567139, "rewards_train/margins_1": 1.8624482154846191, "rewards_train/margins_2": 0.6756546497344971, "step": 450 }, { "epoch": 1.35, "logps_train/policy_1_2": -110.33842468261719, "logps_train/policy_1_l": -94.56925964355469, "logps_train/policy_1_w": -93.20892333984375, "logps_train/policy_2_2": -79.26443481445312, "logps_train/policy_2_w": -128.05502319335938, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -76.5, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": 1.3200637102127075, "rewards_train/1-l": -1.8067307472229004, "rewards_train/1-w": 2.628326892852783, "rewards_train/2-2": 2.565744400024414, "rewards_train/2-w": 0.8890295028686523, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.435057640075684, "rewards_train/margins_1": 1.3082631826400757, "rewards_train/margins_2": 1.6767148971557617, "step": 450 }, { "epoch": 1.35, "logps_train/policy_1_2": -144.2040252685547, "logps_train/policy_1_l": -118.82759094238281, "logps_train/policy_1_w": -92.80455017089844, "logps_train/policy_2_2": -115.5305404663086, "logps_train/policy_2_w": -114.6103286743164, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 1.8686602115631104, "rewards_train/1-l": -1.3710404634475708, "rewards_train/1-w": 2.818178176879883, "rewards_train/2-2": 2.718820571899414, "rewards_train/2-w": 1.8290060758590698, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.189218640327454, "rewards_train/margins_1": 0.9495179653167725, "rewards_train/margins_2": 0.8898144960403442, "step": 450 }, { "epoch": 1.35, "logps_train/policy_1_2": -139.88833618164062, "logps_train/policy_1_l": -140.6175537109375, "logps_train/policy_1_w": -86.67371368408203, "logps_train/policy_2_2": -109.10527038574219, "logps_train/policy_2_w": -118.14551544189453, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -112.5, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.3517913818359375, "rewards_train/1-l": -2.810192346572876, "rewards_train/1-w": 2.7263784408569336, "rewards_train/2-2": 2.9769725799560547, "rewards_train/2-w": 1.318260669708252, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.53657078742981, "rewards_train/margins_1": 1.374587059020996, "rewards_train/margins_2": 1.6587119102478027, "step": 450 }, { "epoch": 1.35, "logps_train/policy_1_2": -116.64949035644531, "logps_train/policy_1_l": -214.37449645996094, "logps_train/policy_1_w": -64.18938446044922, "logps_train/policy_2_2": -84.18592834472656, "logps_train/policy_2_w": -90.52750396728516, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -85.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -102.0, "rewards_train/1-2": 1.1334880590438843, "rewards_train/1-l": -4.139695644378662, "rewards_train/1-w": 2.081843376159668, "rewards_train/2-2": 2.3564069271087646, "rewards_train/2-w": 1.1183433532714844, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.22153902053833, "rewards_train/margins_1": 0.9483553171157837, "rewards_train/margins_2": 1.2380635738372803, "step": 450 }, { "epoch": 1.35, "logps_train/policy_1_2": -224.22653198242188, "logps_train/policy_1_l": -235.95664978027344, "logps_train/policy_1_w": -170.50511169433594, "logps_train/policy_2_2": -160.25814819335938, "logps_train/policy_2_w": -249.17982482910156, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 0.8304727077484131, "rewards_train/1-l": -3.550351858139038, "rewards_train/1-w": 3.946363925933838, "rewards_train/2-2": 3.436685800552368, "rewards_train/2-w": 0.7663930654525757, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.496715784072876, "rewards_train/margins_1": 3.115891218185425, "rewards_train/margins_2": 2.6702927350997925, "step": 450 }, { "epoch": 1.35, "logps_train/policy_1_2": -158.53387451171875, "logps_train/policy_1_l": -225.39071655273438, "logps_train/policy_1_w": -147.90310668945312, "logps_train/policy_2_2": -123.87190246582031, "logps_train/policy_2_w": -194.02394104003906, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -193.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 2.3278634548187256, "rewards_train/1-l": -3.257821559906006, "rewards_train/1-w": 3.4034390449523926, "rewards_train/2-2": 3.3846843242645264, "rewards_train/2-w": 1.5476055145263672, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.661260604858398, "rewards_train/margins_1": 1.075575590133667, "rewards_train/margins_2": 1.8370788097381592, "step": 450 }, { "epoch": 1.35, "logps_train/policy_1_2": -172.7363739013672, "logps_train/policy_1_l": -178.65853881835938, "logps_train/policy_1_w": -135.03526306152344, "logps_train/policy_2_2": -147.855712890625, "logps_train/policy_2_w": -165.91543579101562, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 2.6216750144958496, "rewards_train/1-l": -2.26399827003479, "rewards_train/1-w": 3.212099552154541, "rewards_train/2-2": 3.6863036155700684, "rewards_train/2-w": 1.8147071599960327, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.476097822189331, "rewards_train/margins_1": 0.5904245376586914, "rewards_train/margins_2": 1.8715964555740356, "step": 450 }, { "epoch": 1.35, "logps_train/policy_1_2": -84.69097900390625, "logps_train/policy_1_l": -94.4417495727539, "logps_train/policy_1_w": -38.975894927978516, "logps_train/policy_2_2": -70.6517562866211, "logps_train/policy_2_w": -52.533966064453125, "logps_train/ref_1_2": -98.0, "logps_train/ref_1_l": -76.0, "logps_train/ref_1_w": -60.5, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -68.0, "rewards_train/1-2": 1.3246521949768066, "rewards_train/1-l": -1.8719091415405273, "rewards_train/1-w": 2.16178560256958, "rewards_train/2-2": 2.159043073654175, "rewards_train/2-w": 1.5575411319732666, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.033694744110107, "rewards_train/margins_1": 0.8371334075927734, "rewards_train/margins_2": 0.6015019416809082, "step": 451 }, { "epoch": 1.35, "logps_train/policy_1_2": -217.11416625976562, "logps_train/policy_1_l": -231.02529907226562, "logps_train/policy_1_w": -117.15187072753906, "logps_train/policy_2_2": -171.52664184570312, "logps_train/policy_2_w": -160.6407012939453, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 2.018270969390869, "rewards_train/1-l": -2.8923749923706055, "rewards_train/1-w": 3.311765670776367, "rewards_train/2-2": 3.463742256164551, "rewards_train/2-w": 1.7853437662124634, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.204140663146973, "rewards_train/margins_1": 1.293494701385498, "rewards_train/margins_2": 1.6783984899520874, "step": 451 }, { "epoch": 1.35, "logps_train/policy_1_2": -140.507080078125, "logps_train/policy_1_l": -175.10385131835938, "logps_train/policy_1_w": -92.40455627441406, "logps_train/policy_2_2": -109.78138732910156, "logps_train/policy_2_w": -113.69625854492188, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.3281986713409424, "rewards_train/1-l": -2.8665390014648438, "rewards_train/1-w": 2.8562240600585938, "rewards_train/2-2": 2.3089704513549805, "rewards_train/2-w": 1.808889627456665, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.7227630615234375, "rewards_train/margins_1": 1.5280253887176514, "rewards_train/margins_2": 0.5000808238983154, "step": 451 }, { "epoch": 1.35, "logps_train/policy_1_2": -165.01654052734375, "logps_train/policy_1_l": -165.41311645507812, "logps_train/policy_1_w": -151.80564880371094, "logps_train/policy_2_2": -126.90602111816406, "logps_train/policy_2_w": -192.91363525390625, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 2.151470184326172, "rewards_train/1-l": -1.7012975215911865, "rewards_train/1-w": 3.8225598335266113, "rewards_train/2-2": 3.5312728881835938, "rewards_train/2-w": 1.8383231163024902, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.523857355117798, "rewards_train/margins_1": 1.6710896492004395, "rewards_train/margins_2": 1.6929497718811035, "step": 451 }, { "epoch": 1.35, "logps_train/policy_1_2": -176.16928100585938, "logps_train/policy_1_l": -148.0988311767578, "logps_train/policy_1_w": -113.63226318359375, "logps_train/policy_2_2": -149.13014221191406, "logps_train/policy_2_w": -139.61244201660156, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.9174473285675049, "rewards_train/1-l": -1.988984227180481, "rewards_train/1-w": 2.7906808853149414, "rewards_train/2-2": 3.1416733264923096, "rewards_train/2-w": 1.6984236240386963, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.779665112495422, "rewards_train/margins_1": 0.8732335567474365, "rewards_train/margins_2": 1.4432497024536133, "step": 451 }, { "epoch": 1.35, "logps_train/policy_1_2": -145.8664093017578, "logps_train/policy_1_l": -282.1063232421875, "logps_train/policy_1_w": -104.74745178222656, "logps_train/policy_2_2": -115.61705017089844, "logps_train/policy_2_w": -139.40585327148438, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -242.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.7063274383544922, "rewards_train/1-l": -3.9481351375579834, "rewards_train/1-w": 2.550645589828491, "rewards_train/2-2": 2.905482292175293, "rewards_train/2-w": 1.5875401496887207, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.498780727386475, "rewards_train/margins_1": 0.844318151473999, "rewards_train/margins_2": 1.3179421424865723, "step": 451 }, { "epoch": 1.35, "logps_train/policy_1_2": -163.48887634277344, "logps_train/policy_1_l": -211.90298461914062, "logps_train/policy_1_w": -155.09698486328125, "logps_train/policy_2_2": -122.84271240234375, "logps_train/policy_2_w": -196.2361297607422, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 1.582362413406372, "rewards_train/1-l": -2.1758460998535156, "rewards_train/1-w": 2.7457704544067383, "rewards_train/2-2": 2.959479331970215, "rewards_train/2-w": 0.9156444072723389, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.921616554260254, "rewards_train/margins_1": 1.1634080410003662, "rewards_train/margins_2": 2.043834924697876, "step": 451 }, { "epoch": 1.35, "logps_train/policy_1_2": -106.27459716796875, "logps_train/policy_1_l": -137.85430908203125, "logps_train/policy_1_w": -84.93114471435547, "logps_train/policy_2_2": -78.0448989868164, "logps_train/policy_2_w": -115.96656799316406, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -101.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.0592589378356934, "rewards_train/1-l": -2.71199369430542, "rewards_train/1-w": 2.6994636058807373, "rewards_train/2-2": 2.283010482788086, "rewards_train/2-w": 1.2697497606277466, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.411457300186157, "rewards_train/margins_1": 1.640204668045044, "rewards_train/margins_2": 1.0132607221603394, "step": 451 }, { "epoch": 1.35, "learning_rate": 1.31470699109653e-06, "loss": 0.3862, "step": 452 }, { "epoch": 1.35, "logps_train/policy_1_2": -58.827354431152344, "logps_train/policy_1_l": -106.91581726074219, "logps_train/policy_1_w": -55.580787658691406, "logps_train/policy_2_2": -38.69233322143555, "logps_train/policy_2_w": -93.67864227294922, "logps_train/ref_1_2": -68.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -77.5, "logps_train/ref_2_2": -53.75, "logps_train/ref_2_w": -93.5, "rewards_train/1-2": 0.9047645330429077, "rewards_train/1-l": -2.563236713409424, "rewards_train/1-w": 2.176516056060791, "rewards_train/2-2": 1.4937551021575928, "rewards_train/2-w": -0.023333147168159485, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.739752769470215, "rewards_train/margins_1": 1.2717515230178833, "rewards_train/margins_2": 1.5170882493257523, "step": 452 }, { "epoch": 1.35, "logps_train/policy_1_2": -228.5302734375, "logps_train/policy_1_l": -240.19717407226562, "logps_train/policy_1_w": -242.79806518554688, "logps_train/policy_2_2": -174.59857177734375, "logps_train/policy_2_w": -326.1317138671875, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -214.0, "logps_train/ref_1_w": -282.0, "logps_train/ref_2_2": -213.0, "logps_train/ref_2_w": -332.0, "rewards_train/1-2": 1.7500971555709839, "rewards_train/1-l": -2.580362558364868, "rewards_train/1-w": 3.909548759460449, "rewards_train/2-2": 3.822173595428467, "rewards_train/2-w": 0.5887823700904846, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.489911317825317, "rewards_train/margins_1": 2.1594516038894653, "rewards_train/margins_2": 3.233391225337982, "step": 452 }, { "epoch": 1.35, "logps_train/policy_1_2": -214.7376708984375, "logps_train/policy_1_l": -197.65719604492188, "logps_train/policy_1_w": -148.231689453125, "logps_train/policy_2_2": -163.5314178466797, "logps_train/policy_2_w": -192.227783203125, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 1.5842405557632446, "rewards_train/1-l": -2.2952122688293457, "rewards_train/1-w": 3.0408949851989746, "rewards_train/2-2": 3.7667369842529297, "rewards_train/2-w": 1.2256594896316528, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.33610725402832, "rewards_train/margins_1": 1.45665442943573, "rewards_train/margins_2": 2.541077494621277, "step": 452 }, { "epoch": 1.35, "logps_train/policy_1_2": -151.38372802734375, "logps_train/policy_1_l": -122.0101318359375, "logps_train/policy_1_w": -74.94435119628906, "logps_train/policy_2_2": -108.81309509277344, "logps_train/policy_2_w": -104.05601501464844, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": -0.018451452255249023, "rewards_train/1-l": -1.9871464967727661, "rewards_train/1-w": 1.8219707012176514, "rewards_train/2-2": 2.1132218837738037, "rewards_train/2-w": 1.1279926300048828, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8091171979904175, "rewards_train/margins_1": 1.8404221534729004, "rewards_train/margins_2": 0.9852292537689209, "step": 452 }, { "epoch": 1.35, "logps_train/policy_1_2": -140.6705322265625, "logps_train/policy_1_l": -157.3812255859375, "logps_train/policy_1_w": -92.42224884033203, "logps_train/policy_2_2": -114.05484008789062, "logps_train/policy_2_w": -122.0482177734375, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.847790002822876, "rewards_train/1-l": -3.027966022491455, "rewards_train/1-w": 3.5148065090179443, "rewards_train/2-2": 2.7240071296691895, "rewards_train/2-w": 2.337757110595703, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.542772531509399, "rewards_train/margins_1": 1.6670165061950684, "rewards_train/margins_2": 0.38625001907348633, "step": 452 }, { "epoch": 1.35, "logps_train/policy_1_2": -115.85783386230469, "logps_train/policy_1_l": -216.35997009277344, "logps_train/policy_1_w": -89.84840393066406, "logps_train/policy_2_2": -89.1216049194336, "logps_train/policy_2_w": -108.49363708496094, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -124.5, "rewards_train/1-2": 1.3224198818206787, "rewards_train/1-l": -3.8741812705993652, "rewards_train/1-w": 2.1228744983673096, "rewards_train/2-2": 2.390183448791504, "rewards_train/2-w": 1.5967304706573486, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.997055768966675, "rewards_train/margins_1": 0.8004546165466309, "rewards_train/margins_2": 0.7934529781341553, "step": 452 }, { "epoch": 1.35, "logps_train/policy_1_2": -117.2830581665039, "logps_train/policy_1_l": -265.71142578125, "logps_train/policy_1_w": -169.46340942382812, "logps_train/policy_2_2": -93.97916412353516, "logps_train/policy_2_w": -224.60055541992188, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -237.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 1.9216934442520142, "rewards_train/1-l": -2.8617687225341797, "rewards_train/1-w": 4.106783866882324, "rewards_train/2-2": 2.6122398376464844, "rewards_train/2-w": 2.071194648742676, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.968552589416504, "rewards_train/margins_1": 2.18509042263031, "rewards_train/margins_2": 0.5410451889038086, "step": 452 }, { "epoch": 1.35, "logps_train/policy_1_2": -158.57058715820312, "logps_train/policy_1_l": -163.41775512695312, "logps_train/policy_1_w": -93.06037139892578, "logps_train/policy_2_2": -106.91450500488281, "logps_train/policy_2_w": -134.6045379638672, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 0.8952858448028564, "rewards_train/1-l": -2.983572006225586, "rewards_train/1-w": 2.980681896209717, "rewards_train/2-2": 2.961674690246582, "rewards_train/2-w": 1.611421823501587, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.964253902435303, "rewards_train/margins_1": 2.0853960514068604, "rewards_train/margins_2": 1.3502528667449951, "step": 452 }, { "epoch": 1.36, "logps_train/policy_1_2": -210.53778076171875, "logps_train/policy_1_l": -219.595703125, "logps_train/policy_1_w": -137.20578002929688, "logps_train/policy_2_2": -151.42431640625, "logps_train/policy_2_w": -191.49514770507812, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 0.9899722337722778, "rewards_train/1-l": -2.159666061401367, "rewards_train/1-w": 2.5332298278808594, "rewards_train/2-2": 3.527881622314453, "rewards_train/2-w": 0.604000449180603, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.692895889282227, "rewards_train/margins_1": 1.5432575941085815, "rewards_train/margins_2": 2.92388117313385, "step": 453 }, { "epoch": 1.36, "logps_train/policy_1_2": -155.2066650390625, "logps_train/policy_1_l": -151.09095764160156, "logps_train/policy_1_w": -119.54428100585938, "logps_train/policy_2_2": -124.41709899902344, "logps_train/policy_2_w": -152.32342529296875, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 0.6371465921401978, "rewards_train/1-l": -1.2401511669158936, "rewards_train/1-w": 2.3561182022094727, "rewards_train/2-2": 2.0756726264953613, "rewards_train/2-w": 1.2426574230194092, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.596269369125366, "rewards_train/margins_1": 1.718971610069275, "rewards_train/margins_2": 0.8330152034759521, "step": 453 }, { "epoch": 1.36, "logps_train/policy_1_2": -176.73390197753906, "logps_train/policy_1_l": -107.95309448242188, "logps_train/policy_1_w": -56.03403854370117, "logps_train/policy_2_2": -129.49063110351562, "logps_train/policy_2_w": -86.87998962402344, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -74.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": 0.251609206199646, "rewards_train/1-l": -1.8007785081863403, "rewards_train/1-w": 1.8262839317321777, "rewards_train/2-2": 2.4587490558624268, "rewards_train/2-w": 0.8026259541511536, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.627062439918518, "rewards_train/margins_1": 1.5746747255325317, "rewards_train/margins_2": 1.6561231017112732, "step": 453 }, { "epoch": 1.36, "logps_train/policy_1_2": -235.49465942382812, "logps_train/policy_1_l": -162.8001708984375, "logps_train/policy_1_w": -109.20675659179688, "logps_train/policy_2_2": -189.0052490234375, "logps_train/policy_2_w": -145.46287536621094, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -220.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 0.9880329370498657, "rewards_train/1-l": -2.119080066680908, "rewards_train/1-w": 2.5918240547180176, "rewards_train/2-2": 3.1244752407073975, "rewards_train/2-w": 1.2787120342254639, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.710904121398926, "rewards_train/margins_1": 1.6037911176681519, "rewards_train/margins_2": 1.8457632064819336, "step": 453 }, { "epoch": 1.36, "logps_train/policy_1_2": -150.84310913085938, "logps_train/policy_1_l": -159.57174682617188, "logps_train/policy_1_w": -85.51815795898438, "logps_train/policy_2_2": -123.78938293457031, "logps_train/policy_2_w": -121.60995483398438, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 1.1750653982162476, "rewards_train/1-l": -1.990767240524292, "rewards_train/1-w": 2.5481839179992676, "rewards_train/2-2": 2.2148115634918213, "rewards_train/2-w": 1.1530663967132568, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.53895115852356, "rewards_train/margins_1": 1.37311851978302, "rewards_train/margins_2": 1.0617451667785645, "step": 453 }, { "epoch": 1.36, "logps_train/policy_1_2": -218.89991760253906, "logps_train/policy_1_l": -208.59620666503906, "logps_train/policy_1_w": -167.83364868164062, "logps_train/policy_2_2": -172.73971557617188, "logps_train/policy_2_w": -210.10006713867188, "logps_train/ref_1_2": -235.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 1.5967271327972412, "rewards_train/1-l": -1.9643076658248901, "rewards_train/1-w": 2.8119473457336426, "rewards_train/2-2": 3.5885274410247803, "rewards_train/2-w": 1.5571818351745605, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.776255011558533, "rewards_train/margins_1": 1.2152202129364014, "rewards_train/margins_2": 2.0313456058502197, "step": 453 }, { "epoch": 1.36, "logps_train/policy_1_2": -123.84746551513672, "logps_train/policy_1_l": -115.35356903076172, "logps_train/policy_1_w": -61.49293518066406, "logps_train/policy_2_2": -92.72360229492188, "logps_train/policy_2_w": -92.17477416992188, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -97.5, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -100.5, "rewards_train/1-2": 0.7515814900398254, "rewards_train/1-l": -1.7675836086273193, "rewards_train/1-w": 1.659690499305725, "rewards_train/2-2": 1.92588210105896, "rewards_train/2-w": 0.81455397605896, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4272741079330444, "rewards_train/margins_1": 0.9081090092658997, "rewards_train/margins_2": 1.111328125, "step": 453 }, { "epoch": 1.36, "logps_train/policy_1_2": -104.02134704589844, "logps_train/policy_1_l": -114.37167358398438, "logps_train/policy_1_w": -78.1809310913086, "logps_train/policy_2_2": -82.47651672363281, "logps_train/policy_2_w": -102.3758773803711, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -113.0, "rewards_train/1-2": 1.465052843093872, "rewards_train/1-l": -1.7387301921844482, "rewards_train/1-w": 2.2405006885528564, "rewards_train/2-2": 2.4625039100646973, "rewards_train/2-w": 1.0717873573303223, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9792308807373047, "rewards_train/margins_1": 0.7754478454589844, "rewards_train/margins_2": 1.390716552734375, "step": 453 }, { "epoch": 1.36, "learning_rate": 1.2930194931731382e-06, "loss": 0.4897, "step": 454 }, { "epoch": 1.36, "logps_train/policy_1_2": -226.24932861328125, "logps_train/policy_1_l": -155.83877563476562, "logps_train/policy_1_w": -135.569091796875, "logps_train/policy_2_2": -181.00961303710938, "logps_train/policy_2_w": -166.9437713623047, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.1875666379928589, "rewards_train/1-l": -2.344229221343994, "rewards_train/1-w": 3.1692631244659424, "rewards_train/2-2": 3.7115378379821777, "rewards_train/2-w": 2.0634350776672363, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.5134923458099365, "rewards_train/margins_1": 1.9816964864730835, "rewards_train/margins_2": 1.6481027603149414, "step": 454 }, { "epoch": 1.36, "logps_train/policy_1_2": -167.76023864746094, "logps_train/policy_1_l": -178.3782958984375, "logps_train/policy_1_w": -135.23291015625, "logps_train/policy_2_2": -121.6613540649414, "logps_train/policy_2_w": -194.6507568359375, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.4739763736724854, "rewards_train/1-l": -1.8479865789413452, "rewards_train/1-w": 3.375145196914673, "rewards_train/2-2": 2.90222430229187, "rewards_train/2-w": 1.4739882946014404, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.223131775856018, "rewards_train/margins_1": 1.9011688232421875, "rewards_train/margins_2": 1.4282360076904297, "step": 454 }, { "epoch": 1.36, "logps_train/policy_1_2": -171.00872802734375, "logps_train/policy_1_l": -215.43545532226562, "logps_train/policy_1_w": -132.10194396972656, "logps_train/policy_2_2": -143.25906372070312, "logps_train/policy_2_w": -164.722900390625, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 2.113970994949341, "rewards_train/1-l": -1.824796199798584, "rewards_train/1-w": 3.195274591445923, "rewards_train/2-2": 3.04460072517395, "rewards_train/2-w": 1.916967511177063, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.020070791244507, "rewards_train/margins_1": 1.081303596496582, "rewards_train/margins_2": 1.1276332139968872, "step": 454 }, { "epoch": 1.36, "logps_train/policy_1_2": -186.4683380126953, "logps_train/policy_1_l": -202.52096557617188, "logps_train/policy_1_w": -93.93646240234375, "logps_train/policy_2_2": -154.86386108398438, "logps_train/policy_2_w": -129.22457885742188, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -124.5, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.4219166040420532, "rewards_train/1-l": -3.287252426147461, "rewards_train/1-w": 3.0782291889190674, "rewards_train/2-2": 2.882363796234131, "rewards_train/2-w": 1.8369171619415283, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.365481615066528, "rewards_train/margins_1": 1.6563125848770142, "rewards_train/margins_2": 1.0454466342926025, "step": 454 }, { "epoch": 1.36, "logps_train/policy_1_2": -158.11581420898438, "logps_train/policy_1_l": -143.50119018554688, "logps_train/policy_1_w": -83.04143524169922, "logps_train/policy_2_2": -122.61849975585938, "logps_train/policy_2_w": -120.45002746582031, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -116.5, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 1.3681058883666992, "rewards_train/1-l": -2.717209815979004, "rewards_train/1-w": 3.245857000350952, "rewards_train/2-2": 3.013150215148926, "rewards_train/2-w": 1.724528193473816, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.963066816329956, "rewards_train/margins_1": 1.877751111984253, "rewards_train/margins_2": 1.2886220216751099, "step": 454 }, { "epoch": 1.36, "logps_train/policy_1_2": -193.60867309570312, "logps_train/policy_1_l": -220.03829956054688, "logps_train/policy_1_w": -181.0049285888672, "logps_train/policy_2_2": -155.92527770996094, "logps_train/policy_2_w": -239.05690002441406, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -222.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 2.195383310317993, "rewards_train/1-l": -2.971017837524414, "rewards_train/1-w": 4.034664154052734, "rewards_train/2-2": 3.460597038269043, "rewards_train/2-w": 1.5474351644515991, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.005681991577148, "rewards_train/margins_1": 1.8392808437347412, "rewards_train/margins_2": 1.9131618738174438, "step": 454 }, { "epoch": 1.36, "logps_train/policy_1_2": -153.35751342773438, "logps_train/policy_1_l": -143.5297088623047, "logps_train/policy_1_w": -102.05320739746094, "logps_train/policy_2_2": -110.10992431640625, "logps_train/policy_2_w": -137.1638641357422, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 0.907997727394104, "rewards_train/1-l": -1.8389084339141846, "rewards_train/1-w": 3.077491521835327, "rewards_train/2-2": 2.699944496154785, "rewards_train/2-w": 1.3476755619049072, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.916399955749512, "rewards_train/margins_1": 2.169493794441223, "rewards_train/margins_2": 1.352268934249878, "step": 454 }, { "epoch": 1.36, "logps_train/policy_1_2": -176.04776000976562, "logps_train/policy_1_l": -153.85423278808594, "logps_train/policy_1_w": -103.74758911132812, "logps_train/policy_2_2": -131.50404357910156, "logps_train/policy_2_w": -149.44952392578125, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.8436614274978638, "rewards_train/1-l": -1.744798183441162, "rewards_train/1-w": 2.9326627254486084, "rewards_train/2-2": 3.1949081420898438, "rewards_train/2-w": 1.3820013999938965, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.6774609088897705, "rewards_train/margins_1": 1.0890012979507446, "rewards_train/margins_2": 1.8129067420959473, "step": 454 }, { "epoch": 1.36, "logps_train/policy_1_2": -95.38450622558594, "logps_train/policy_1_l": -142.60691833496094, "logps_train/policy_1_w": -84.9494857788086, "logps_train/policy_2_2": -72.9580307006836, "logps_train/policy_2_w": -118.69800567626953, "logps_train/ref_1_2": -105.5, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 1.0183850526809692, "rewards_train/1-l": -2.6675281524658203, "rewards_train/1-w": 2.5027081966400146, "rewards_train/2-2": 1.9297339916229248, "rewards_train/2-w": 0.9098861813545227, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.170236349105835, "rewards_train/margins_1": 1.4843231439590454, "rewards_train/margins_2": 1.019847810268402, "step": 455 }, { "epoch": 1.36, "logps_train/policy_1_2": -287.133544921875, "logps_train/policy_1_l": -257.6256103515625, "logps_train/policy_1_w": -178.79469299316406, "logps_train/policy_2_2": -204.11270141601562, "logps_train/policy_2_w": -258.0042724609375, "logps_train/ref_1_2": -296.0, "logps_train/ref_1_l": -234.0, "logps_train/ref_1_w": -222.0, "logps_train/ref_2_2": -246.0, "logps_train/ref_2_w": -274.0, "rewards_train/1-2": 0.9382080435752869, "rewards_train/1-l": -2.355923652648926, "rewards_train/1-w": 4.275217533111572, "rewards_train/2-2": 4.161385536193848, "rewards_train/2-w": 1.5230079889297485, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.631141185760498, "rewards_train/margins_1": 3.3370094895362854, "rewards_train/margins_2": 2.638377547264099, "step": 455 }, { "epoch": 1.36, "logps_train/policy_1_2": -124.9782943725586, "logps_train/policy_1_l": -82.76750946044922, "logps_train/policy_1_w": -117.6297836303711, "logps_train/policy_2_2": -96.84678649902344, "logps_train/policy_2_w": -144.42262268066406, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -76.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.3666231632232666, "rewards_train/1-l": -0.6624929904937744, "rewards_train/1-w": 2.659677743911743, "rewards_train/2-2": 2.8903207778930664, "rewards_train/2-w": 1.5971912145614624, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.3221707344055176, "rewards_train/margins_1": 1.2930545806884766, "rewards_train/margins_2": 1.293129563331604, "step": 455 }, { "epoch": 1.36, "logps_train/policy_1_2": -118.40209197998047, "logps_train/policy_1_l": -156.7913818359375, "logps_train/policy_1_w": -98.63868713378906, "logps_train/policy_2_2": -96.23745727539062, "logps_train/policy_2_w": -123.3375473022461, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.4097907543182373, "rewards_train/1-l": -2.315856695175171, "rewards_train/1-w": 2.379490852355957, "rewards_train/2-2": 2.172738552093506, "rewards_train/2-w": 1.2502294778823853, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.695347547531128, "rewards_train/margins_1": 0.9697000980377197, "rewards_train/margins_2": 0.9225090742111206, "step": 455 }, { "epoch": 1.36, "logps_train/policy_1_2": -197.12840270996094, "logps_train/policy_1_l": -173.42628479003906, "logps_train/policy_1_w": -150.8893280029297, "logps_train/policy_2_2": -162.87527465820312, "logps_train/policy_2_w": -213.32164001464844, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.9227067232131958, "rewards_train/1-l": -2.5457534790039062, "rewards_train/1-w": 4.314582824707031, "rewards_train/2-2": 3.3527066707611084, "rewards_train/2-w": 1.9772106409072876, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.8603363037109375, "rewards_train/margins_1": 2.3918761014938354, "rewards_train/margins_2": 1.3754960298538208, "step": 455 }, { "epoch": 1.36, "logps_train/policy_1_2": -164.89437866210938, "logps_train/policy_1_l": -152.0858612060547, "logps_train/policy_1_w": -109.90324401855469, "logps_train/policy_2_2": -129.68214416503906, "logps_train/policy_2_w": -151.82122802734375, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.5629057884216309, "rewards_train/1-l": -1.5096118450164795, "rewards_train/1-w": 2.8190510272979736, "rewards_train/2-2": 2.8025619983673096, "rewards_train/2-w": 1.2756907939910889, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.328662872314453, "rewards_train/margins_1": 1.2561452388763428, "rewards_train/margins_2": 1.5268712043762207, "step": 455 }, { "epoch": 1.36, "logps_train/policy_1_2": -122.74508666992188, "logps_train/policy_1_l": -172.77197265625, "logps_train/policy_1_w": -137.63804626464844, "logps_train/policy_2_2": -105.888916015625, "logps_train/policy_2_w": -166.0642852783203, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 2.2368199825286865, "rewards_train/1-l": -1.9119614362716675, "rewards_train/1-w": 2.9100241661071777, "rewards_train/2-2": 2.6884520053863525, "rewards_train/2-w": 1.706852912902832, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.821985602378845, "rewards_train/margins_1": 0.6732041835784912, "rewards_train/margins_2": 0.9815990924835205, "step": 455 }, { "epoch": 1.36, "logps_train/policy_1_2": -98.78179931640625, "logps_train/policy_1_l": -81.32205963134766, "logps_train/policy_1_w": -92.40034484863281, "logps_train/policy_2_2": -72.37519073486328, "logps_train/policy_2_w": -124.42909240722656, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -94.5, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 1.2171329259872437, "rewards_train/1-l": -1.1509562730789185, "rewards_train/1-w": 2.5787148475646973, "rewards_train/2-2": 2.2066214084625244, "rewards_train/2-w": 0.8414655923843384, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7296711206436157, "rewards_train/margins_1": 1.3615819215774536, "rewards_train/margins_2": 1.365155816078186, "step": 455 }, { "epoch": 1.37, "learning_rate": 1.2714497944194376e-06, "loss": 0.3954, "step": 456 }, { "epoch": 1.37, "logps_train/policy_1_2": -235.90025329589844, "logps_train/policy_1_l": -147.24559020996094, "logps_train/policy_1_w": -90.08052062988281, "logps_train/policy_2_2": -179.7310791015625, "logps_train/policy_2_w": -122.44926452636719, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 0.745130181312561, "rewards_train/1-l": -2.1099109649658203, "rewards_train/1-w": 3.1653857231140137, "rewards_train/2-2": 3.2809946537017822, "rewards_train/2-w": 2.014448642730713, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.275296688079834, "rewards_train/margins_1": 2.4202555418014526, "rewards_train/margins_2": 1.2665460109710693, "step": 456 }, { "epoch": 1.37, "logps_train/policy_1_2": -90.3791275024414, "logps_train/policy_1_l": -117.80802917480469, "logps_train/policy_1_w": -58.6412353515625, "logps_train/policy_2_2": -66.74629211425781, "logps_train/policy_2_w": -79.85435485839844, "logps_train/ref_1_2": -105.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -77.5, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -90.0, "rewards_train/1-2": 1.426149606704712, "rewards_train/1-l": -1.9794846773147583, "rewards_train/1-w": 1.8862671852111816, "rewards_train/2-2": 2.3995895385742188, "rewards_train/2-w": 1.017884612083435, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.86575186252594, "rewards_train/margins_1": 0.4601175785064697, "rewards_train/margins_2": 1.3817049264907837, "step": 456 }, { "epoch": 1.37, "logps_train/policy_1_2": -156.75518798828125, "logps_train/policy_1_l": -226.71995544433594, "logps_train/policy_1_w": -215.5218505859375, "logps_train/policy_2_2": -125.51972198486328, "logps_train/policy_2_w": -274.32427978515625, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -256.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -288.0, "rewards_train/1-2": 1.5221362113952637, "rewards_train/1-l": -2.5108633041381836, "rewards_train/1-w": 3.996253252029419, "rewards_train/2-2": 2.8884572982788086, "rewards_train/2-w": 1.4847590923309326, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.5071165561676025, "rewards_train/margins_1": 2.4741170406341553, "rewards_train/margins_2": 1.403698205947876, "step": 456 }, { "epoch": 1.37, "logps_train/policy_1_2": -139.95982360839844, "logps_train/policy_1_l": -166.31930541992188, "logps_train/policy_1_w": -83.64707946777344, "logps_train/policy_2_2": -105.23838806152344, "logps_train/policy_2_w": -115.42640686035156, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 0.9079242944717407, "rewards_train/1-l": -2.329587459564209, "rewards_train/1-w": 2.5173237323760986, "rewards_train/2-2": 2.5370986461639404, "rewards_train/2-w": 1.3612661361694336, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.846911191940308, "rewards_train/margins_1": 1.609399437904358, "rewards_train/margins_2": 1.1758325099945068, "step": 456 }, { "epoch": 1.37, "logps_train/policy_1_2": -155.68377685546875, "logps_train/policy_1_l": -121.39305114746094, "logps_train/policy_1_w": -148.24209594726562, "logps_train/policy_2_2": -130.38882446289062, "logps_train/policy_2_w": -182.49578857421875, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.7097464799880981, "rewards_train/1-l": -0.8125475645065308, "rewards_train/1-w": 3.198446035385132, "rewards_train/2-2": 2.918931007385254, "rewards_train/2-w": 1.9246394634246826, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.010993599891663, "rewards_train/margins_1": 1.4886995553970337, "rewards_train/margins_2": 0.9942915439605713, "step": 456 }, { "epoch": 1.37, "logps_train/policy_1_2": -117.63355255126953, "logps_train/policy_1_l": -169.69784545898438, "logps_train/policy_1_w": -112.64081573486328, "logps_train/policy_2_2": -91.65400695800781, "logps_train/policy_2_w": -142.42640686035156, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 2.1007070541381836, "rewards_train/1-l": -2.9033782482147217, "rewards_train/1-w": 2.8718557357788086, "rewards_train/2-2": 3.042412281036377, "rewards_train/2-w": 1.7229844331741333, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.77523398399353, "rewards_train/margins_1": 0.771148681640625, "rewards_train/margins_2": 1.3194278478622437, "step": 456 }, { "epoch": 1.37, "logps_train/policy_1_2": -134.61329650878906, "logps_train/policy_1_l": -123.78359985351562, "logps_train/policy_1_w": -88.58514404296875, "logps_train/policy_2_2": -98.96417999267578, "logps_train/policy_2_w": -114.15750122070312, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.0136702060699463, "rewards_train/1-l": -1.3611236810684204, "rewards_train/1-w": 2.75242280960083, "rewards_train/2-2": 2.2199883460998535, "rewards_train/2-w": 1.4842498302459717, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.1135464906692505, "rewards_train/margins_1": 1.7387526035308838, "rewards_train/margins_2": 0.7357385158538818, "step": 456 }, { "epoch": 1.37, "logps_train/policy_1_2": -133.49945068359375, "logps_train/policy_1_l": -118.86901092529297, "logps_train/policy_1_w": -148.1724090576172, "logps_train/policy_2_2": -105.15812683105469, "logps_train/policy_2_w": -180.84503173828125, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.4375542402267456, "rewards_train/1-l": -1.2808462381362915, "rewards_train/1-w": 3.3280715942382812, "rewards_train/2-2": 2.7630934715270996, "rewards_train/2-w": 1.7279982566833496, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.608917832374573, "rewards_train/margins_1": 1.8905173540115356, "rewards_train/margins_2": 1.03509521484375, "step": 456 }, { "epoch": 1.37, "logps_train/policy_1_2": -170.44288635253906, "logps_train/policy_1_l": -200.02099609375, "logps_train/policy_1_w": -142.35862731933594, "logps_train/policy_2_2": -126.50350952148438, "logps_train/policy_2_w": -200.73062133789062, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.572507619857788, "rewards_train/1-l": -1.8882317543029785, "rewards_train/1-w": 3.022730827331543, "rewards_train/2-2": 3.050626277923584, "rewards_train/2-w": 1.648421287536621, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.9109625816345215, "rewards_train/margins_1": 1.4502232074737549, "rewards_train/margins_2": 1.402204990386963, "step": 457 }, { "epoch": 1.37, "logps_train/policy_1_2": -146.17816162109375, "logps_train/policy_1_l": -184.25265502929688, "logps_train/policy_1_w": -155.63427734375, "logps_train/policy_2_2": -111.59253692626953, "logps_train/policy_2_w": -195.3660888671875, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.3946834802627563, "rewards_train/1-l": -1.8086636066436768, "rewards_train/1-w": 3.4795401096343994, "rewards_train/2-2": 2.7368404865264893, "rewards_train/2-w": 1.6212046146392822, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.288203716278076, "rewards_train/margins_1": 2.084856629371643, "rewards_train/margins_2": 1.115635871887207, "step": 457 }, { "epoch": 1.37, "logps_train/policy_1_2": -134.9036865234375, "logps_train/policy_1_l": -181.64614868164062, "logps_train/policy_1_w": -92.40264892578125, "logps_train/policy_2_2": -95.51589965820312, "logps_train/policy_2_w": -134.55255126953125, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.1283820867538452, "rewards_train/1-l": -2.172623634338379, "rewards_train/1-w": 2.7866883277893066, "rewards_train/2-2": 2.620285987854004, "rewards_train/2-w": 1.299919843673706, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.9593119621276855, "rewards_train/margins_1": 1.6583062410354614, "rewards_train/margins_2": 1.3203661441802979, "step": 457 }, { "epoch": 1.37, "logps_train/policy_1_2": -149.87078857421875, "logps_train/policy_1_l": -91.01051330566406, "logps_train/policy_1_w": -86.95913696289062, "logps_train/policy_2_2": -119.81349182128906, "logps_train/policy_2_w": -136.34124755859375, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -80.5, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 0.9029588103294373, "rewards_train/1-l": -1.0565202236175537, "rewards_train/1-w": 3.9048681259155273, "rewards_train/2-2": 2.336228847503662, "rewards_train/2-w": 1.6838449239730835, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.961388349533081, "rewards_train/margins_1": 3.00190931558609, "rewards_train/margins_2": 0.6523839235305786, "step": 457 }, { "epoch": 1.37, "logps_train/policy_1_2": -128.83555603027344, "logps_train/policy_1_l": -177.84010314941406, "logps_train/policy_1_w": -86.8581771850586, "logps_train/policy_2_2": -94.57125091552734, "logps_train/policy_2_w": -120.19517517089844, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.327772855758667, "rewards_train/1-l": -2.7894792556762695, "rewards_train/1-w": 2.1626200675964355, "rewards_train/2-2": 2.564359664916992, "rewards_train/2-w": 0.9218884706497192, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.952099323272705, "rewards_train/margins_1": 0.8348472118377686, "rewards_train/margins_2": 1.642471194267273, "step": 457 }, { "epoch": 1.37, "logps_train/policy_1_2": -90.03970336914062, "logps_train/policy_1_l": -90.72592163085938, "logps_train/policy_1_w": -144.70480346679688, "logps_train/policy_2_2": -62.339385986328125, "logps_train/policy_2_w": -190.8265380859375, "logps_train/ref_1_2": -105.0, "logps_train/ref_1_l": -79.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -85.5, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.4960298538208008, "rewards_train/1-l": -1.1452486515045166, "rewards_train/1-w": 3.2576441764831543, "rewards_train/2-2": 2.3347134590148926, "rewards_train/2-w": 0.7079716920852661, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.402892827987671, "rewards_train/margins_1": 1.7616143226623535, "rewards_train/margins_2": 1.6267417669296265, "step": 457 }, { "epoch": 1.37, "logps_train/policy_1_2": -168.42898559570312, "logps_train/policy_1_l": -223.447265625, "logps_train/policy_1_w": -127.47260284423828, "logps_train/policy_2_2": -141.5306396484375, "logps_train/policy_2_w": -167.2869873046875, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 2.2086637020111084, "rewards_train/1-l": -3.526756525039673, "rewards_train/1-w": 3.805083751678467, "rewards_train/2-2": 3.2484993934631348, "rewards_train/2-w": 1.8931759595870972, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.33184027671814, "rewards_train/margins_1": 1.5964200496673584, "rewards_train/margins_2": 1.3553234338760376, "step": 457 }, { "epoch": 1.37, "logps_train/policy_1_2": -178.7366943359375, "logps_train/policy_1_l": -210.73101806640625, "logps_train/policy_1_w": -193.93203735351562, "logps_train/policy_2_2": -144.4752197265625, "logps_train/policy_2_w": -243.32444763183594, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -245.0, "rewards_train/1-2": 2.1165659427642822, "rewards_train/1-l": -2.3376526832580566, "rewards_train/1-w": 2.518514394760132, "rewards_train/2-2": 3.473083734512329, "rewards_train/2-w": 0.18317976593971252, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.8561670780181885, "rewards_train/margins_1": 0.4019484519958496, "rewards_train/margins_2": 3.2899039685726166, "step": 457 }, { "epoch": 1.37, "learning_rate": 1.2500000000000007e-06, "loss": 0.5176, "step": 458 }, { "epoch": 1.37, "logps_train/policy_1_2": -160.41867065429688, "logps_train/policy_1_l": -206.66571044921875, "logps_train/policy_1_w": -167.2435760498047, "logps_train/policy_2_2": -111.90243530273438, "logps_train/policy_2_w": -223.85667419433594, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": 1.2971949577331543, "rewards_train/1-l": -2.9813175201416016, "rewards_train/1-w": 2.4358959197998047, "rewards_train/2-2": 2.9417874813079834, "rewards_train/2-w": 0.508277416229248, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.417213439941406, "rewards_train/margins_1": 1.1387009620666504, "rewards_train/margins_2": 2.4335100650787354, "step": 458 }, { "epoch": 1.37, "logps_train/policy_1_2": -112.33889770507812, "logps_train/policy_1_l": -87.3105239868164, "logps_train/policy_1_w": -46.24188232421875, "logps_train/policy_2_2": -80.638671875, "logps_train/policy_2_w": -63.69607925415039, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -62.25, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -74.0, "rewards_train/1-2": 1.063376545906067, "rewards_train/1-l": -0.9651341438293457, "rewards_train/1-w": 1.6090149879455566, "rewards_train/2-2": 2.9044928550720215, "rewards_train/2-w": 1.0565638542175293, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.5741491317749023, "rewards_train/margins_1": 0.5456384420394897, "rewards_train/margins_2": 1.8479290008544922, "step": 458 }, { "epoch": 1.37, "logps_train/policy_1_2": -75.4957046508789, "logps_train/policy_1_l": -107.82781219482422, "logps_train/policy_1_w": -73.45486450195312, "logps_train/policy_2_2": -53.64645004272461, "logps_train/policy_2_w": -129.23129272460938, "logps_train/ref_1_2": -89.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -73.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 1.3644918203353882, "rewards_train/1-l": -1.5112972259521484, "rewards_train/1-w": 2.326388359069824, "rewards_train/2-2": 1.9478551149368286, "rewards_train/2-w": 0.8135882616043091, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8376855850219727, "rewards_train/margins_1": 0.961896538734436, "rewards_train/margins_2": 1.1342668533325195, "step": 458 }, { "epoch": 1.37, "logps_train/policy_1_2": -134.77711486816406, "logps_train/policy_1_l": -190.0873260498047, "logps_train/policy_1_w": -100.77413940429688, "logps_train/policy_2_2": -107.65017700195312, "logps_train/policy_2_w": -131.0209197998047, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.4269767999649048, "rewards_train/1-l": -1.9267016649246216, "rewards_train/1-w": 2.885867118835449, "rewards_train/2-2": 2.6092007160186768, "rewards_train/2-w": 1.703376293182373, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.812568783760071, "rewards_train/margins_1": 1.4588903188705444, "rewards_train/margins_2": 0.9058244228363037, "step": 458 }, { "epoch": 1.37, "logps_train/policy_1_2": -159.91021728515625, "logps_train/policy_1_l": -216.6361541748047, "logps_train/policy_1_w": -130.50672912597656, "logps_train/policy_2_2": -122.26844787597656, "logps_train/policy_2_w": -171.40504455566406, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -193.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.7355396747589111, "rewards_train/1-l": -2.365177631378174, "rewards_train/1-w": 3.191514730453491, "rewards_train/2-2": 3.033700942993164, "rewards_train/2-w": 2.220433235168457, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.556692361831665, "rewards_train/margins_1": 1.45597505569458, "rewards_train/margins_2": 0.813267707824707, "step": 458 }, { "epoch": 1.37, "logps_train/policy_1_2": -206.1297607421875, "logps_train/policy_1_l": -201.2538604736328, "logps_train/policy_1_w": -125.85255432128906, "logps_train/policy_2_2": -169.7891845703125, "logps_train/policy_2_w": -164.460693359375, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.5979602336883545, "rewards_train/1-l": -2.4812464714050293, "rewards_train/1-w": 3.1678693294525146, "rewards_train/2-2": 3.128113031387329, "rewards_train/2-w": 2.0101816654205322, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.649115800857544, "rewards_train/margins_1": 1.5699090957641602, "rewards_train/margins_2": 1.1179313659667969, "step": 458 }, { "epoch": 1.37, "logps_train/policy_1_2": -178.23880004882812, "logps_train/policy_1_l": -109.90493774414062, "logps_train/policy_1_w": -112.2110595703125, "logps_train/policy_2_2": -141.21957397460938, "logps_train/policy_2_w": -143.7086181640625, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 0.3401833772659302, "rewards_train/1-l": -1.02311110496521, "rewards_train/1-w": 3.244518756866455, "rewards_train/2-2": 1.3913233280181885, "rewards_train/2-w": 1.8990604877471924, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.267629861831665, "rewards_train/margins_1": 2.904335379600525, "rewards_train/margins_2": -0.5077371597290039, "step": 458 }, { "epoch": 1.37, "logps_train/policy_1_2": -232.17593383789062, "logps_train/policy_1_l": -235.34490966796875, "logps_train/policy_1_w": -153.14816284179688, "logps_train/policy_2_2": -173.85430908203125, "logps_train/policy_2_w": -208.93238830566406, "logps_train/ref_1_2": -251.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 1.8511571884155273, "rewards_train/1-l": -2.3282415866851807, "rewards_train/1-w": 3.935183048248291, "rewards_train/2-2": 3.733318567276001, "rewards_train/2-w": 1.6505109071731567, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.263424634933472, "rewards_train/margins_1": 2.0840258598327637, "rewards_train/margins_2": 2.0828076601028442, "step": 458 }, { "epoch": 1.37, "logps_train/policy_1_2": -149.55860900878906, "logps_train/policy_1_l": -161.09255981445312, "logps_train/policy_1_w": -74.97837829589844, "logps_train/policy_2_2": -112.52774047851562, "logps_train/policy_2_w": -109.97056579589844, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 1.5582013130187988, "rewards_train/1-l": -1.6014435291290283, "rewards_train/1-w": 2.6740365028381348, "rewards_train/2-2": 2.8362879753112793, "rewards_train/2-w": 1.438881278038025, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.275480031967163, "rewards_train/margins_1": 1.115835189819336, "rewards_train/margins_2": 1.3974066972732544, "step": 459 }, { "epoch": 1.37, "logps_train/policy_1_2": -155.73318481445312, "logps_train/policy_1_l": -160.02398681640625, "logps_train/policy_1_w": -110.50164794921875, "logps_train/policy_2_2": -124.60505676269531, "logps_train/policy_2_w": -145.41783142089844, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.4298075437545776, "rewards_train/1-l": -1.7191470861434937, "rewards_train/1-w": 3.2779600620269775, "rewards_train/2-2": 2.5957446098327637, "rewards_train/2-w": 1.5222793817520142, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.997107148170471, "rewards_train/margins_1": 1.8481525182724, "rewards_train/margins_2": 1.0734652280807495, "step": 459 }, { "epoch": 1.37, "logps_train/policy_1_2": -156.56761169433594, "logps_train/policy_1_l": -178.39703369140625, "logps_train/policy_1_w": -104.47726440429688, "logps_train/policy_2_2": -115.2376708984375, "logps_train/policy_2_w": -148.5823211669922, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 0.979567289352417, "rewards_train/1-l": -2.20064115524292, "rewards_train/1-w": 3.2042269706726074, "rewards_train/2-2": 2.466857433319092, "rewards_train/2-w": 1.4093458652496338, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.404868125915527, "rewards_train/margins_1": 2.2246596813201904, "rewards_train/margins_2": 1.057511568069458, "step": 459 }, { "epoch": 1.37, "logps_train/policy_1_2": -141.61460876464844, "logps_train/policy_1_l": -240.67868041992188, "logps_train/policy_1_w": -139.13748168945312, "logps_train/policy_2_2": -113.94462585449219, "logps_train/policy_2_w": -168.1815948486328, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -214.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.7713512182235718, "rewards_train/1-l": -2.7010715007781982, "rewards_train/1-w": 3.06203293800354, "rewards_train/2-2": 2.6235055923461914, "rewards_train/2-w": 2.069340705871582, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.763104438781738, "rewards_train/margins_1": 1.2906817197799683, "rewards_train/margins_2": 0.5541648864746094, "step": 459 }, { "epoch": 1.37, "logps_train/policy_1_2": -189.96316528320312, "logps_train/policy_1_l": -193.4417724609375, "logps_train/policy_1_w": -111.44867706298828, "logps_train/policy_2_2": -150.40966796875, "logps_train/policy_2_w": -160.5488739013672, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 1.6755585670471191, "rewards_train/1-l": -2.6849985122680664, "rewards_train/1-w": 3.203667402267456, "rewards_train/2-2": 3.2980966567993164, "rewards_train/2-w": 1.4669880867004395, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.8886659145355225, "rewards_train/margins_1": 1.528108835220337, "rewards_train/margins_2": 1.831108570098877, "step": 459 }, { "epoch": 1.37, "logps_train/policy_1_2": -197.48013305664062, "logps_train/policy_1_l": -218.50704956054688, "logps_train/policy_1_w": -125.28482818603516, "logps_train/policy_2_2": -146.4678955078125, "logps_train/policy_2_w": -177.705810546875, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.4707375764846802, "rewards_train/1-l": -2.840254783630371, "rewards_train/1-w": 2.708822250366211, "rewards_train/2-2": 3.6735243797302246, "rewards_train/2-w": 0.6794173121452332, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.549077033996582, "rewards_train/margins_1": 1.2380846738815308, "rewards_train/margins_2": 2.9941070675849915, "step": 459 }, { "epoch": 1.37, "logps_train/policy_1_2": -97.54031372070312, "logps_train/policy_1_l": -110.30706787109375, "logps_train/policy_1_w": -78.15971374511719, "logps_train/policy_2_2": -72.84109497070312, "logps_train/policy_2_w": -109.20317077636719, "logps_train/ref_1_2": -103.0, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -89.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 0.5287814140319824, "rewards_train/1-l": -1.7568778991699219, "rewards_train/1-w": 2.474458694458008, "rewards_train/2-2": 1.5850311517715454, "rewards_train/2-w": 1.0906199216842651, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.23133659362793, "rewards_train/margins_1": 1.9456772804260254, "rewards_train/margins_2": 0.4944112300872803, "step": 459 }, { "epoch": 1.37, "logps_train/policy_1_2": -132.458251953125, "logps_train/policy_1_l": -96.80628967285156, "logps_train/policy_1_w": -70.07070922851562, "logps_train/policy_2_2": -89.65006256103516, "logps_train/policy_2_w": -105.25346374511719, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 1.1697993278503418, "rewards_train/1-l": -1.4634418487548828, "rewards_train/1-w": 2.324960231781006, "rewards_train/2-2": 2.9646811485290527, "rewards_train/2-w": 1.1090288162231445, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7884020805358887, "rewards_train/margins_1": 1.155160903930664, "rewards_train/margins_2": 1.8556523323059082, "step": 459 }, { "epoch": 1.38, "learning_rate": 1.2286722033769494e-06, "loss": 0.5002, "step": 460 }, { "epoch": 1.38, "logps_train/policy_1_2": -216.46142578125, "logps_train/policy_1_l": -279.83807373046875, "logps_train/policy_1_w": -167.92481994628906, "logps_train/policy_2_2": -176.5467987060547, "logps_train/policy_2_w": -208.51333618164062, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -240.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 2.3999509811401367, "rewards_train/1-l": -3.9338059425354004, "rewards_train/1-w": 4.2512688636779785, "rewards_train/2-2": 3.646883249282837, "rewards_train/2-w": 2.5174174308776855, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 8.185074806213379, "rewards_train/margins_1": 1.8513178825378418, "rewards_train/margins_2": 1.1294658184051514, "step": 460 }, { "epoch": 1.38, "logps_train/policy_1_2": -186.60958862304688, "logps_train/policy_1_l": -206.98988342285156, "logps_train/policy_1_w": -105.8216552734375, "logps_train/policy_2_2": -151.64096069335938, "logps_train/policy_2_w": -135.23382568359375, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.8265409469604492, "rewards_train/1-l": -2.713637113571167, "rewards_train/1-w": 2.3943979740142822, "rewards_train/2-2": 3.342153310775757, "rewards_train/2-w": 1.2293510437011719, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.108035087585449, "rewards_train/margins_1": 0.567857027053833, "rewards_train/margins_2": 2.112802267074585, "step": 460 }, { "epoch": 1.38, "logps_train/policy_1_2": -139.92666625976562, "logps_train/policy_1_l": -156.75213623046875, "logps_train/policy_1_w": -130.5084991455078, "logps_train/policy_2_2": -106.60752868652344, "logps_train/policy_2_w": -176.8893585205078, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 2.3206145763397217, "rewards_train/1-l": -2.2506051063537598, "rewards_train/1-w": 3.9350883960723877, "rewards_train/2-2": 3.408778667449951, "rewards_train/2-w": 1.5501267910003662, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.1856935024261475, "rewards_train/margins_1": 1.614473819732666, "rewards_train/margins_2": 1.858651876449585, "step": 460 }, { "epoch": 1.38, "logps_train/policy_1_2": -121.30245971679688, "logps_train/policy_1_l": -113.83439636230469, "logps_train/policy_1_w": -96.39076232910156, "logps_train/policy_2_2": -93.14898681640625, "logps_train/policy_2_w": -137.197998046875, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 0.9275662899017334, "rewards_train/1-l": -2.2270922660827637, "rewards_train/1-w": 3.0453968048095703, "rewards_train/2-2": 2.276116371154785, "rewards_train/2-w": 1.2165294885635376, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.272489070892334, "rewards_train/margins_1": 2.117830514907837, "rewards_train/margins_2": 1.0595868825912476, "step": 460 }, { "epoch": 1.38, "logps_train/policy_1_2": -148.211181640625, "logps_train/policy_1_l": -150.01641845703125, "logps_train/policy_1_w": -110.84656524658203, "logps_train/policy_2_2": -111.07325744628906, "logps_train/policy_2_w": -146.0191192626953, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.513257384300232, "rewards_train/1-l": -2.5852365493774414, "rewards_train/1-w": 3.1161251068115234, "rewards_train/2-2": 3.2270498275756836, "rewards_train/2-w": 1.2418382167816162, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.701361656188965, "rewards_train/margins_1": 1.6028677225112915, "rewards_train/margins_2": 1.9852116107940674, "step": 460 }, { "epoch": 1.38, "logps_train/policy_1_2": -191.12820434570312, "logps_train/policy_1_l": -189.71881103515625, "logps_train/policy_1_w": -152.6315460205078, "logps_train/policy_2_2": -158.77691650390625, "logps_train/policy_2_w": -186.66357421875, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.6133513450622559, "rewards_train/1-l": -2.54063081741333, "rewards_train/1-w": 3.7587199211120605, "rewards_train/2-2": 3.1144962310791016, "rewards_train/2-w": 2.263329029083252, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.299350738525391, "rewards_train/margins_1": 2.1453685760498047, "rewards_train/margins_2": 0.8511672019958496, "step": 460 }, { "epoch": 1.38, "logps_train/policy_1_2": -97.95567321777344, "logps_train/policy_1_l": -121.24303436279297, "logps_train/policy_1_w": -92.64832305908203, "logps_train/policy_2_2": -70.26386260986328, "logps_train/policy_2_w": -117.14431762695312, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.4934954643249512, "rewards_train/1-l": -1.6516475677490234, "rewards_train/1-w": 2.9429802894592285, "rewards_train/2-2": 2.540801525115967, "rewards_train/2-w": 1.8996310234069824, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.594627857208252, "rewards_train/margins_1": 1.4494848251342773, "rewards_train/margins_2": 0.6411705017089844, "step": 460 }, { "epoch": 1.38, "logps_train/policy_1_2": -121.3326416015625, "logps_train/policy_1_l": -145.05795288085938, "logps_train/policy_1_w": -118.83108520507812, "logps_train/policy_2_2": -101.54930877685547, "logps_train/policy_2_w": -144.5599365234375, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.9222040176391602, "rewards_train/1-l": -2.2391934394836426, "rewards_train/1-w": 2.9467735290527344, "rewards_train/2-2": 2.697413206100464, "rewards_train/2-w": 1.7018184661865234, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.185966968536377, "rewards_train/margins_1": 1.0245695114135742, "rewards_train/margins_2": 0.9955947399139404, "step": 460 }, { "epoch": 1.38, "logps_train/policy_1_2": -89.88373565673828, "logps_train/policy_1_l": -78.38440704345703, "logps_train/policy_1_w": -58.99134826660156, "logps_train/policy_2_2": -73.5372314453125, "logps_train/policy_2_w": -75.2981185913086, "logps_train/ref_1_2": -98.5, "logps_train/ref_1_l": -66.5, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -88.0, "logps_train/ref_2_w": -87.0, "rewards_train/1-2": 0.8788141012191772, "rewards_train/1-l": -1.1869754791259766, "rewards_train/1-w": 1.9390490055084229, "rewards_train/2-2": 1.415222406387329, "rewards_train/2-w": 1.1418676376342773, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.1260244846343994, "rewards_train/margins_1": 1.0602349042892456, "rewards_train/margins_2": 0.27335476875305176, "step": 461 }, { "epoch": 1.38, "logps_train/policy_1_2": -125.14947509765625, "logps_train/policy_1_l": -157.16378784179688, "logps_train/policy_1_w": -86.72865295410156, "logps_train/policy_2_2": -100.867919921875, "logps_train/policy_2_w": -114.71299743652344, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.9528264999389648, "rewards_train/1-l": -3.2866921424865723, "rewards_train/1-w": 2.906431198120117, "rewards_train/2-2": 2.9268808364868164, "rewards_train/2-w": 1.6245988607406616, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.1931233406066895, "rewards_train/margins_1": 0.9536046981811523, "rewards_train/margins_2": 1.3022819757461548, "step": 461 }, { "epoch": 1.38, "logps_train/policy_1_2": -118.38463592529297, "logps_train/policy_1_l": -121.53496551513672, "logps_train/policy_1_w": -82.30132293701172, "logps_train/policy_2_2": -87.75312042236328, "logps_train/policy_2_w": -113.83070373535156, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 1.0255987644195557, "rewards_train/1-l": -1.9639463424682617, "rewards_train/1-w": 2.322993040084839, "rewards_train/2-2": 2.5403127670288086, "rewards_train/2-w": 1.277086853981018, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.286939382553101, "rewards_train/margins_1": 1.2973942756652832, "rewards_train/margins_2": 1.2632259130477905, "step": 461 }, { "epoch": 1.38, "logps_train/policy_1_2": -185.21807861328125, "logps_train/policy_1_l": -171.6171875, "logps_train/policy_1_w": -130.44244384765625, "logps_train/policy_2_2": -151.53302001953125, "logps_train/policy_2_w": -165.33404541015625, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 2.8438162803649902, "rewards_train/1-l": -2.0117197036743164, "rewards_train/1-w": 3.504193067550659, "rewards_train/2-2": 3.9881033897399902, "rewards_train/2-w": 2.2302663326263428, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.515912771224976, "rewards_train/margins_1": 0.660376787185669, "rewards_train/margins_2": 1.7578370571136475, "step": 461 }, { "epoch": 1.38, "logps_train/policy_1_2": -119.4256820678711, "logps_train/policy_1_l": -118.56749725341797, "logps_train/policy_1_w": -47.5426025390625, "logps_train/policy_2_2": -92.82356262207031, "logps_train/policy_2_w": -72.57275390625, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -73.5, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -89.0, "rewards_train/1-2": 1.3027445077896118, "rewards_train/1-l": -2.124034881591797, "rewards_train/1-w": 2.5879273414611816, "rewards_train/2-2": 2.6137375831604004, "rewards_train/2-w": 1.663036823272705, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.7119622230529785, "rewards_train/margins_1": 1.2851828336715698, "rewards_train/margins_2": 0.9507007598876953, "step": 461 }, { "epoch": 1.38, "logps_train/policy_1_2": -102.8184814453125, "logps_train/policy_1_l": -105.76678466796875, "logps_train/policy_1_w": -76.2275161743164, "logps_train/policy_2_2": -77.57441711425781, "logps_train/policy_2_w": -106.19570922851562, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -85.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 1.3119018077850342, "rewards_train/1-l": -2.0005555152893066, "rewards_train/1-w": 2.4659199714660645, "rewards_train/2-2": 2.2050578594207764, "rewards_train/2-w": 0.988241970539093, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.466475486755371, "rewards_train/margins_1": 1.1540181636810303, "rewards_train/margins_2": 1.2168158888816833, "step": 461 }, { "epoch": 1.38, "logps_train/policy_1_2": -136.68641662597656, "logps_train/policy_1_l": -170.98721313476562, "logps_train/policy_1_w": -141.70748901367188, "logps_train/policy_2_2": -107.1588134765625, "logps_train/policy_2_w": -191.3895263671875, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.5297958850860596, "rewards_train/1-l": -2.935244083404541, "rewards_train/1-w": 3.024564743041992, "rewards_train/2-2": 2.29154109954834, "rewards_train/2-w": 1.021202802658081, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.959808826446533, "rewards_train/margins_1": 1.4947688579559326, "rewards_train/margins_2": 1.2703382968902588, "step": 461 }, { "epoch": 1.38, "logps_train/policy_1_2": -86.89717102050781, "logps_train/policy_1_l": -99.81489562988281, "logps_train/policy_1_w": -85.41675567626953, "logps_train/policy_2_2": -66.26287841796875, "logps_train/policy_2_w": -106.81288146972656, "logps_train/ref_1_2": -97.0, "logps_train/ref_1_l": -86.5, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -87.5, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 1.03606379032135, "rewards_train/1-l": -1.3387163877487183, "rewards_train/1-w": 2.164574384689331, "rewards_train/2-2": 2.127228260040283, "rewards_train/2-w": 1.357774257659912, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.5032907724380493, "rewards_train/margins_1": 1.128510594367981, "rewards_train/margins_2": 0.7694540023803711, "step": 461 }, { "epoch": 1.38, "learning_rate": 1.207468486105636e-06, "loss": 0.4503, "step": 462 }, { "epoch": 1.38, "logps_train/policy_1_2": -152.78265380859375, "logps_train/policy_1_l": -122.55772399902344, "logps_train/policy_1_w": -85.2284927368164, "logps_train/policy_2_2": -110.02001953125, "logps_train/policy_2_w": -120.86091613769531, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 0.9451732039451599, "rewards_train/1-l": -1.8857040405273438, "rewards_train/1-w": 2.337111711502075, "rewards_train/2-2": 2.616748809814453, "rewards_train/2-w": 1.0777757167816162, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.222815752029419, "rewards_train/margins_1": 1.3919385075569153, "rewards_train/margins_2": 1.538973093032837, "step": 462 }, { "epoch": 1.38, "logps_train/policy_1_2": -124.81082916259766, "logps_train/policy_1_l": -152.39486694335938, "logps_train/policy_1_w": -96.21015930175781, "logps_train/policy_2_2": -101.6025390625, "logps_train/policy_2_w": -118.6845703125, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 2.0142295360565186, "rewards_train/1-l": -2.663313865661621, "rewards_train/1-w": 3.1821091175079346, "rewards_train/2-2": 2.954589366912842, "rewards_train/2-w": 2.1081061363220215, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.845422983169556, "rewards_train/margins_1": 1.167879581451416, "rewards_train/margins_2": 0.8464832305908203, "step": 462 }, { "epoch": 1.38, "logps_train/policy_1_2": -149.60032653808594, "logps_train/policy_1_l": -115.98199462890625, "logps_train/policy_1_w": -135.0755157470703, "logps_train/policy_2_2": -117.28138732910156, "logps_train/policy_2_w": -172.23126220703125, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.8930914402008057, "rewards_train/1-l": -1.5705684423446655, "rewards_train/1-w": 3.472135543823242, "rewards_train/2-2": 3.414830446243286, "rewards_train/2-w": 1.8604671955108643, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.042703986167908, "rewards_train/margins_1": 1.5790441036224365, "rewards_train/margins_2": 1.5543632507324219, "step": 462 }, { "epoch": 1.38, "logps_train/policy_1_2": -66.09263610839844, "logps_train/policy_1_l": -82.40376281738281, "logps_train/policy_1_w": -52.35014343261719, "logps_train/policy_2_2": -55.65658187866211, "logps_train/policy_2_w": -69.86144256591797, "logps_train/ref_1_2": -78.0, "logps_train/ref_1_l": -67.5, "logps_train/ref_1_w": -75.0, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -86.0, "rewards_train/1-2": 1.1633926630020142, "rewards_train/1-l": -1.5060014724731445, "rewards_train/1-w": 2.236860752105713, "rewards_train/2-2": 1.8448889255523682, "rewards_train/2-w": 1.5853397846221924, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.7428622245788574, "rewards_train/margins_1": 1.0734680891036987, "rewards_train/margins_2": 0.2595491409301758, "step": 462 }, { "epoch": 1.38, "logps_train/policy_1_2": -219.3472442626953, "logps_train/policy_1_l": -186.51626586914062, "logps_train/policy_1_w": -125.72454833984375, "logps_train/policy_2_2": -176.04287719726562, "logps_train/policy_2_w": -179.10769653320312, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -215.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.6910576820373535, "rewards_train/1-l": -2.069789409637451, "rewards_train/1-w": 3.7875542640686035, "rewards_train/2-2": 3.8445403575897217, "rewards_train/2-w": 1.931417465209961, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.857343673706055, "rewards_train/margins_1": 2.09649658203125, "rewards_train/margins_2": 1.9131228923797607, "step": 462 }, { "epoch": 1.38, "logps_train/policy_1_2": -128.30215454101562, "logps_train/policy_1_l": -161.1876678466797, "logps_train/policy_1_w": -72.63190460205078, "logps_train/policy_2_2": -97.48098754882812, "logps_train/policy_2_w": -100.5760498046875, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -111.5, "rewards_train/1-2": 1.460410237312317, "rewards_train/1-l": -1.5283360481262207, "rewards_train/1-w": 2.3512625694274902, "rewards_train/2-2": 2.6019015312194824, "rewards_train/2-w": 1.1068484783172607, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.879598617553711, "rewards_train/margins_1": 0.8908523321151733, "rewards_train/margins_2": 1.4950530529022217, "step": 462 }, { "epoch": 1.38, "logps_train/policy_1_2": -83.65092468261719, "logps_train/policy_1_l": -95.07431030273438, "logps_train/policy_1_w": -81.54066467285156, "logps_train/policy_2_2": -59.23504638671875, "logps_train/policy_2_w": -110.23539733886719, "logps_train/ref_1_2": -99.5, "logps_train/ref_1_l": -79.0, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -84.5, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 1.57025945186615, "rewards_train/1-l": -1.6033291816711426, "rewards_train/1-w": 2.434410810470581, "rewards_train/2-2": 2.5157532691955566, "rewards_train/2-w": 1.2032179832458496, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.037739992141724, "rewards_train/margins_1": 0.8641513586044312, "rewards_train/margins_2": 1.312535285949707, "step": 462 }, { "epoch": 1.38, "logps_train/policy_1_2": -73.5079574584961, "logps_train/policy_1_l": -64.11749267578125, "logps_train/policy_1_w": -86.49757385253906, "logps_train/policy_2_2": -52.20268630981445, "logps_train/policy_2_w": -121.95065307617188, "logps_train/ref_1_2": -90.0, "logps_train/ref_1_l": -50.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -75.5, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 1.6070168018341064, "rewards_train/1-l": -1.4326238632202148, "rewards_train/1-w": 3.2814931869506836, "rewards_train/2-2": 2.3484814167022705, "rewards_train/2-w": 1.2830603122711182, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.714117050170898, "rewards_train/margins_1": 1.6744763851165771, "rewards_train/margins_2": 1.0654211044311523, "step": 462 }, { "epoch": 1.39, "logps_train/policy_1_2": -177.25228881835938, "logps_train/policy_1_l": -235.73611450195312, "logps_train/policy_1_w": -101.3880615234375, "logps_train/policy_2_2": -128.3749237060547, "logps_train/policy_2_w": -141.09524536132812, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.3997721672058105, "rewards_train/1-l": -2.484743356704712, "rewards_train/1-w": 2.9158811569213867, "rewards_train/2-2": 3.1984453201293945, "rewards_train/2-w": 2.030320644378662, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.400624513626099, "rewards_train/margins_1": 1.5161089897155762, "rewards_train/margins_2": 1.1681246757507324, "step": 463 }, { "epoch": 1.39, "logps_train/policy_1_2": -197.40594482421875, "logps_train/policy_1_l": -254.898193359375, "logps_train/policy_1_w": -137.1903533935547, "logps_train/policy_2_2": -166.7419891357422, "logps_train/policy_2_w": -176.59832763671875, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -219.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.4012019634246826, "rewards_train/1-l": -3.5984137058258057, "rewards_train/1-w": 3.4294023513793945, "rewards_train/2-2": 2.9238479137420654, "rewards_train/2-w": 2.3057913780212402, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.0278160572052, "rewards_train/margins_1": 2.028200387954712, "rewards_train/margins_2": 0.6180565357208252, "step": 463 }, { "epoch": 1.39, "logps_train/policy_1_2": -112.35611724853516, "logps_train/policy_1_l": -79.59799194335938, "logps_train/policy_1_w": -89.54347229003906, "logps_train/policy_2_2": -77.75859069824219, "logps_train/policy_2_w": -112.67560577392578, "logps_train/ref_1_2": -120.5, "logps_train/ref_1_l": -69.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -98.5, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 0.801498293876648, "rewards_train/1-l": -1.093490719795227, "rewards_train/1-w": 2.9808096885681152, "rewards_train/2-2": 2.085078239440918, "rewards_train/2-w": 1.943766713142395, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.074300408363342, "rewards_train/margins_1": 2.1793113946914673, "rewards_train/margins_2": 0.14131152629852295, "step": 463 }, { "epoch": 1.39, "logps_train/policy_1_2": -90.14039611816406, "logps_train/policy_1_l": -77.82445526123047, "logps_train/policy_1_w": -62.77823257446289, "logps_train/policy_2_2": -73.86325073242188, "logps_train/policy_2_w": -79.14862060546875, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -64.5, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": 1.8468976020812988, "rewards_train/1-l": -1.3572500944137573, "rewards_train/1-w": 2.408114433288574, "rewards_train/2-2": 2.6300809383392334, "rewards_train/2-w": 1.5992002487182617, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7653645277023315, "rewards_train/margins_1": 0.5612168312072754, "rewards_train/margins_2": 1.0308806896209717, "step": 463 }, { "epoch": 1.39, "logps_train/policy_1_2": -163.35621643066406, "logps_train/policy_1_l": -244.45440673828125, "logps_train/policy_1_w": -128.2237091064453, "logps_train/policy_2_2": -123.73299407958984, "logps_train/policy_2_w": -169.146240234375, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -225.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.7139880657196045, "rewards_train/1-l": -1.943878412246704, "rewards_train/1-w": 2.930753707885742, "rewards_train/2-2": 2.6513097286224365, "rewards_train/2-w": 1.497288703918457, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.874632120132446, "rewards_train/margins_1": 1.2167656421661377, "rewards_train/margins_2": 1.1540210247039795, "step": 463 }, { "epoch": 1.39, "logps_train/policy_1_2": -112.1268310546875, "logps_train/policy_1_l": -97.91758728027344, "logps_train/policy_1_w": -78.01493835449219, "logps_train/policy_2_2": -88.12339782714844, "logps_train/policy_2_w": -110.18107604980469, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -80.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": 1.2396605014801025, "rewards_train/1-l": -1.776914358139038, "rewards_train/1-w": 1.9141312837600708, "rewards_train/2-2": 2.2610976696014404, "rewards_train/2-w": 0.6373614072799683, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.691045641899109, "rewards_train/margins_1": 0.6744707822799683, "rewards_train/margins_2": 1.6237362623214722, "step": 463 }, { "epoch": 1.39, "logps_train/policy_1_2": -173.24664306640625, "logps_train/policy_1_l": -154.35108947753906, "logps_train/policy_1_w": -140.66781616210938, "logps_train/policy_2_2": -132.23110961914062, "logps_train/policy_2_w": -186.76443481445312, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 2.140960693359375, "rewards_train/1-l": -1.9146020412445068, "rewards_train/1-w": 3.5722804069519043, "rewards_train/2-2": 3.323763847351074, "rewards_train/2-w": 2.0376205444335938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.486882448196411, "rewards_train/margins_1": 1.4313197135925293, "rewards_train/margins_2": 1.2861433029174805, "step": 463 }, { "epoch": 1.39, "logps_train/policy_1_2": -130.72943115234375, "logps_train/policy_1_l": -121.49102783203125, "logps_train/policy_1_w": -157.15296936035156, "logps_train/policy_2_2": -106.61604309082031, "logps_train/policy_2_w": -186.57858276367188, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 2.3981497287750244, "rewards_train/1-l": -1.8424623012542725, "rewards_train/1-w": 3.8190789222717285, "rewards_train/2-2": 3.205583095550537, "rewards_train/2-w": 2.3421435356140137, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.661541223526001, "rewards_train/margins_1": 1.420929193496704, "rewards_train/margins_2": 0.8634395599365234, "step": 463 }, { "epoch": 1.39, "learning_rate": 1.1863909176314855e-06, "loss": 0.4465, "step": 464 }, { "epoch": 1.39, "logps_train/policy_1_2": -73.73262786865234, "logps_train/policy_1_l": -68.8421401977539, "logps_train/policy_1_w": -58.46581268310547, "logps_train/policy_2_2": -60.313480377197266, "logps_train/policy_2_w": -70.45082092285156, "logps_train/ref_1_2": -87.5, "logps_train/ref_1_l": -55.5, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -78.0, "logps_train/ref_2_w": -84.0, "rewards_train/1-2": 1.4033000469207764, "rewards_train/1-l": -1.342905879020691, "rewards_train/1-w": 1.946387767791748, "rewards_train/2-2": 1.771972417831421, "rewards_train/2-w": 1.332261323928833, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.289293646812439, "rewards_train/margins_1": 0.5430877208709717, "rewards_train/margins_2": 0.4397110939025879, "step": 464 }, { "epoch": 1.39, "logps_train/policy_1_2": -154.87738037109375, "logps_train/policy_1_l": -180.75827026367188, "logps_train/policy_1_w": -74.31159973144531, "logps_train/policy_2_2": -123.17459869384766, "logps_train/policy_2_w": -98.41551208496094, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -108.5, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 1.4966368675231934, "rewards_train/1-l": -3.0286107063293457, "rewards_train/1-w": 3.418449640274048, "rewards_train/2-2": 2.792306423187256, "rewards_train/2-w": 2.5313000679016113, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.4470603466033936, "rewards_train/margins_1": 1.9218127727508545, "rewards_train/margins_2": 0.26100635528564453, "step": 464 }, { "epoch": 1.39, "logps_train/policy_1_2": -149.28195190429688, "logps_train/policy_1_l": -185.10263061523438, "logps_train/policy_1_w": -92.16304016113281, "logps_train/policy_2_2": -115.88145446777344, "logps_train/policy_2_w": -119.25664520263672, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.8811793327331543, "rewards_train/1-l": -3.13995099067688, "rewards_train/1-w": 2.9688520431518555, "rewards_train/2-2": 2.9712295532226562, "rewards_train/2-w": 2.041522741317749, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.108803033828735, "rewards_train/margins_1": 1.0876727104187012, "rewards_train/margins_2": 0.9297068119049072, "step": 464 }, { "epoch": 1.39, "logps_train/policy_1_2": -192.08444213867188, "logps_train/policy_1_l": -165.43531799316406, "logps_train/policy_1_w": -133.14895629882812, "logps_train/policy_2_2": -160.18772888183594, "logps_train/policy_2_w": -164.50955200195312, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.9774932861328125, "rewards_train/1-l": -1.8766131401062012, "rewards_train/1-w": 3.1316120624542236, "rewards_train/2-2": 3.518726348876953, "rewards_train/2-w": 1.7246055603027344, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.008225202560425, "rewards_train/margins_1": 1.1541187763214111, "rewards_train/margins_2": 1.7941207885742188, "step": 464 }, { "epoch": 1.39, "logps_train/policy_1_2": -223.86859130859375, "logps_train/policy_1_l": -312.59466552734375, "logps_train/policy_1_w": -181.94833374023438, "logps_train/policy_2_2": -187.56906127929688, "logps_train/policy_2_w": -231.66632080078125, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -280.0, "logps_train/ref_1_w": -225.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 2.008453369140625, "rewards_train/1-l": -3.1086854934692383, "rewards_train/1-w": 4.286417007446289, "rewards_train/2-2": 3.4555931091308594, "rewards_train/2-w": 2.478679656982422, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.395102500915527, "rewards_train/margins_1": 2.277963638305664, "rewards_train/margins_2": 0.9769134521484375, "step": 464 }, { "epoch": 1.39, "logps_train/policy_1_2": -128.68426513671875, "logps_train/policy_1_l": -193.6718292236328, "logps_train/policy_1_w": -87.82870483398438, "logps_train/policy_2_2": -109.48345947265625, "logps_train/policy_2_w": -106.52018737792969, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 1.9534478187561035, "rewards_train/1-l": -2.174799919128418, "rewards_train/1-w": 2.858144760131836, "rewards_train/2-2": 2.6317319869995117, "rewards_train/2-w": 2.201106548309326, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.032944679260254, "rewards_train/margins_1": 0.9046969413757324, "rewards_train/margins_2": 0.43062543869018555, "step": 464 }, { "epoch": 1.39, "logps_train/policy_1_2": -128.61473083496094, "logps_train/policy_1_l": -130.49752807617188, "logps_train/policy_1_w": -92.02030944824219, "logps_train/policy_2_2": -101.4352798461914, "logps_train/policy_2_w": -110.61799621582031, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -115.5, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": 1.1021987199783325, "rewards_train/1-l": -1.5103001594543457, "rewards_train/1-w": 2.430781602859497, "rewards_train/2-2": 1.884890079498291, "rewards_train/2-w": 1.6788252592086792, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.9410817623138428, "rewards_train/margins_1": 1.3285828828811646, "rewards_train/margins_2": 0.20606482028961182, "step": 464 }, { "epoch": 1.39, "logps_train/policy_1_2": -127.4490966796875, "logps_train/policy_1_l": -155.62026977539062, "logps_train/policy_1_w": -102.25225067138672, "logps_train/policy_2_2": -96.95233917236328, "logps_train/policy_2_w": -143.04312133789062, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.228527545928955, "rewards_train/1-l": -2.5598793029785156, "rewards_train/1-w": 2.831806182861328, "rewards_train/2-2": 2.6172657012939453, "rewards_train/2-w": 0.8597510457038879, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.391685485839844, "rewards_train/margins_1": 1.603278636932373, "rewards_train/margins_2": 1.7575146555900574, "step": 464 }, { "epoch": 1.39, "logps_train/policy_1_2": -232.3153076171875, "logps_train/policy_1_l": -198.49026489257812, "logps_train/policy_1_w": -123.74154663085938, "logps_train/policy_2_2": -185.84832763671875, "logps_train/policy_2_w": -157.1746826171875, "logps_train/ref_1_2": -264.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -237.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 3.105968952178955, "rewards_train/1-l": -3.3523473739624023, "rewards_train/1-w": 3.978970527648926, "rewards_train/2-2": 5.115948677062988, "rewards_train/2-w": 2.482532501220703, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.331317901611328, "rewards_train/margins_1": 0.8730015754699707, "rewards_train/margins_2": 2.633416175842285, "step": 465 }, { "epoch": 1.39, "logps_train/policy_1_2": -135.31005859375, "logps_train/policy_1_l": -127.91118621826172, "logps_train/policy_1_w": -70.1863784790039, "logps_train/policy_2_2": -101.62602996826172, "logps_train/policy_2_w": -96.44151306152344, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": 1.1674323081970215, "rewards_train/1-l": -1.8807668685913086, "rewards_train/1-w": 2.7782371044158936, "rewards_train/2-2": 2.6608335971832275, "rewards_train/2-w": 2.002722978591919, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.659003973007202, "rewards_train/margins_1": 1.610804796218872, "rewards_train/margins_2": 0.6581106185913086, "step": 465 }, { "epoch": 1.39, "logps_train/policy_1_2": -208.1947479248047, "logps_train/policy_1_l": -164.21121215820312, "logps_train/policy_1_w": -120.94482421875, "logps_train/policy_2_2": -155.35740661621094, "logps_train/policy_2_w": -177.30484008789062, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 1.1598215103149414, "rewards_train/1-l": -2.223464250564575, "rewards_train/1-w": 3.8266115188598633, "rewards_train/2-2": 3.335352897644043, "rewards_train/2-w": 1.591391921043396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.0500757694244385, "rewards_train/margins_1": 2.666790008544922, "rewards_train/margins_2": 1.743960976600647, "step": 465 }, { "epoch": 1.39, "logps_train/policy_1_2": -246.66665649414062, "logps_train/policy_1_l": -253.96728515625, "logps_train/policy_1_w": -224.73861694335938, "logps_train/policy_2_2": -196.46182250976562, "logps_train/policy_2_w": -291.08404541015625, "logps_train/ref_1_2": -274.0, "logps_train/ref_1_l": -223.0, "logps_train/ref_1_w": -268.0, "logps_train/ref_2_2": -239.0, "logps_train/ref_2_w": -308.0, "rewards_train/1-2": 2.655989170074463, "rewards_train/1-l": -3.1239755153656006, "rewards_train/1-w": 4.339223384857178, "rewards_train/2-2": 4.2671003341674805, "rewards_train/2-w": 1.7926688194274902, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.463198900222778, "rewards_train/margins_1": 1.6832342147827148, "rewards_train/margins_2": 2.4744315147399902, "step": 465 }, { "epoch": 1.39, "logps_train/policy_1_2": -259.4006652832031, "logps_train/policy_1_l": -239.513916015625, "logps_train/policy_1_w": -158.6549530029297, "logps_train/policy_2_2": -213.20411682128906, "logps_train/policy_2_w": -199.80799865722656, "logps_train/ref_1_2": -272.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -248.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.4036836624145508, "rewards_train/1-l": -3.3420157432556152, "rewards_train/1-w": 4.225130081176758, "rewards_train/2-2": 3.454587936401367, "rewards_train/2-w": 2.9504501819610596, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 7.567145824432373, "rewards_train/margins_1": 2.821446418762207, "rewards_train/margins_2": 0.5041377544403076, "step": 465 }, { "epoch": 1.39, "logps_train/policy_1_2": -121.5334243774414, "logps_train/policy_1_l": -151.096435546875, "logps_train/policy_1_w": -128.9910888671875, "logps_train/policy_2_2": -81.19383239746094, "logps_train/policy_2_w": -189.79745483398438, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": 1.0661890506744385, "rewards_train/1-l": -2.2741949558258057, "rewards_train/1-w": 3.0587048530578613, "rewards_train/2-2": 2.1696791648864746, "rewards_train/2-w": 0.5124418139457703, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.332899808883667, "rewards_train/margins_1": 1.9925158023834229, "rewards_train/margins_2": 1.6572373509407043, "step": 465 }, { "epoch": 1.39, "logps_train/policy_1_2": -143.99755859375, "logps_train/policy_1_l": -134.0701141357422, "logps_train/policy_1_w": -81.64130401611328, "logps_train/policy_2_2": -110.89132690429688, "logps_train/policy_2_w": -103.21395874023438, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -115.5, "rewards_train/1-2": 0.5902832746505737, "rewards_train/1-l": -1.5038385391235352, "rewards_train/1-w": 2.3292288780212402, "rewards_train/2-2": 1.9600856304168701, "rewards_train/2-w": 1.2450095415115356, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8330674171447754, "rewards_train/margins_1": 1.7389456033706665, "rewards_train/margins_2": 0.7150760889053345, "step": 465 }, { "epoch": 1.39, "logps_train/policy_1_2": -153.9862060546875, "logps_train/policy_1_l": -153.87603759765625, "logps_train/policy_1_w": -117.67662811279297, "logps_train/policy_2_2": -121.33222961425781, "logps_train/policy_2_w": -156.66331481933594, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": 1.5545047521591187, "rewards_train/1-l": -2.5540103912353516, "rewards_train/1-w": 3.0815558433532715, "rewards_train/2-2": 2.957402229309082, "rewards_train/2-w": 1.4211680889129639, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.635566234588623, "rewards_train/margins_1": 1.5270510911941528, "rewards_train/margins_2": 1.5362341403961182, "step": 465 }, { "epoch": 1.4, "learning_rate": 1.1654415550880245e-06, "loss": 0.4108, "step": 466 }, { "epoch": 1.4, "logps_train/policy_1_2": -200.07888793945312, "logps_train/policy_1_l": -209.5597686767578, "logps_train/policy_1_w": -145.5385284423828, "logps_train/policy_2_2": -154.76779174804688, "logps_train/policy_2_w": -204.237548828125, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.4503138065338135, "rewards_train/1-l": -2.779219150543213, "rewards_train/1-w": 3.3414597511291504, "rewards_train/2-2": 3.214236259460449, "rewards_train/2-w": 1.574683427810669, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.120678901672363, "rewards_train/margins_1": 1.891145944595337, "rewards_train/margins_2": 1.6395528316497803, "step": 466 }, { "epoch": 1.4, "logps_train/policy_1_2": -170.5067596435547, "logps_train/policy_1_l": -128.1207733154297, "logps_train/policy_1_w": -137.89334106445312, "logps_train/policy_2_2": -122.833251953125, "logps_train/policy_2_w": -183.94247436523438, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 1.7258864641189575, "rewards_train/1-l": -1.0025076866149902, "rewards_train/1-w": 3.70910382270813, "rewards_train/2-2": 3.565112590789795, "rewards_train/2-w": 1.721376657485962, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.71161150932312, "rewards_train/margins_1": 1.9832173585891724, "rewards_train/margins_2": 1.843735933303833, "step": 466 }, { "epoch": 1.4, "logps_train/policy_1_2": -102.86595153808594, "logps_train/policy_1_l": -195.6485137939453, "logps_train/policy_1_w": -93.70429229736328, "logps_train/policy_2_2": -82.97273254394531, "logps_train/policy_2_w": -117.29194641113281, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 2.0126233100891113, "rewards_train/1-l": -3.121882915496826, "rewards_train/1-w": 2.5733208656311035, "rewards_train/2-2": 2.4741132259368896, "rewards_train/2-w": 1.5473684072494507, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.69520378112793, "rewards_train/margins_1": 0.5606975555419922, "rewards_train/margins_2": 0.926744818687439, "step": 466 }, { "epoch": 1.4, "logps_train/policy_1_2": -223.91595458984375, "logps_train/policy_1_l": -236.23052978515625, "logps_train/policy_1_w": -116.89511108398438, "logps_train/policy_2_2": -157.37548828125, "logps_train/policy_2_w": -162.80398559570312, "logps_train/ref_1_2": -252.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 2.8012747764587402, "rewards_train/1-l": -2.7558650970458984, "rewards_train/1-w": 3.3471107482910156, "rewards_train/2-2": 5.297852516174316, "rewards_train/2-w": 1.5143765211105347, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.102975845336914, "rewards_train/margins_1": 0.5458359718322754, "rewards_train/margins_2": 3.7834759950637817, "step": 466 }, { "epoch": 1.4, "logps_train/policy_1_2": -139.4932098388672, "logps_train/policy_1_l": -163.6617431640625, "logps_train/policy_1_w": -99.1444320678711, "logps_train/policy_2_2": -107.23009490966797, "logps_train/policy_2_w": -150.13975524902344, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.852241039276123, "rewards_train/1-l": -2.1571402549743652, "rewards_train/1-w": 3.230283737182617, "rewards_train/2-2": 3.129333972930908, "rewards_train/2-w": 1.1891487836837769, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.387423992156982, "rewards_train/margins_1": 1.3780426979064941, "rewards_train/margins_2": 1.9401851892471313, "step": 466 }, { "epoch": 1.4, "logps_train/policy_1_2": -245.76715087890625, "logps_train/policy_1_l": -287.3658142089844, "logps_train/policy_1_w": -144.59808349609375, "logps_train/policy_2_2": -194.45751953125, "logps_train/policy_2_w": -203.59710693359375, "logps_train/ref_1_2": -266.0, "logps_train/ref_1_l": -260.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -236.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 1.992033839225769, "rewards_train/1-l": -2.861581802368164, "rewards_train/1-w": 4.152690887451172, "rewards_train/2-2": 4.085498809814453, "rewards_train/2-w": 2.1652889251708984, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.014272689819336, "rewards_train/margins_1": 2.160657048225403, "rewards_train/margins_2": 1.9202098846435547, "step": 466 }, { "epoch": 1.4, "logps_train/policy_1_2": -123.50431823730469, "logps_train/policy_1_l": -204.57440185546875, "logps_train/policy_1_w": -132.1864776611328, "logps_train/policy_2_2": -93.727783203125, "logps_train/policy_2_w": -168.5592498779297, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 2.4151930809020996, "rewards_train/1-l": -2.551189661026001, "rewards_train/1-w": 3.7376017570495605, "rewards_train/2-2": 3.5178475379943848, "rewards_train/2-w": 2.237825870513916, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.2887914180755615, "rewards_train/margins_1": 1.322408676147461, "rewards_train/margins_2": 1.2800216674804688, "step": 466 }, { "epoch": 1.4, "logps_train/policy_1_2": -139.77154541015625, "logps_train/policy_1_l": -195.77032470703125, "logps_train/policy_1_w": -115.31279754638672, "logps_train/policy_2_2": -113.4148178100586, "logps_train/policy_2_w": -146.8031005859375, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.2505788803100586, "rewards_train/1-l": -2.2433409690856934, "rewards_train/1-w": 3.715595006942749, "rewards_train/2-2": 2.188596725463867, "rewards_train/2-w": 2.7400026321411133, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.958935976028442, "rewards_train/margins_1": 2.4650161266326904, "rewards_train/margins_2": -0.5514059066772461, "step": 466 }, { "epoch": 1.4, "logps_train/policy_1_2": -152.677490234375, "logps_train/policy_1_l": -169.78538513183594, "logps_train/policy_1_w": -113.46044158935547, "logps_train/policy_2_2": -108.70047760009766, "logps_train/policy_2_w": -157.85801696777344, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.876001238822937, "rewards_train/1-l": -2.0055887699127197, "rewards_train/1-w": 3.121143341064453, "rewards_train/2-2": 2.57682728767395, "rewards_train/2-w": 1.0345102548599243, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.126732110977173, "rewards_train/margins_1": 2.245142102241516, "rewards_train/margins_2": 1.5423170328140259, "step": 467 }, { "epoch": 1.4, "logps_train/policy_1_2": -184.91192626953125, "logps_train/policy_1_l": -108.09806823730469, "logps_train/policy_1_w": -102.37164306640625, "logps_train/policy_2_2": -149.53607177734375, "logps_train/policy_2_w": -138.39285278320312, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.1353697776794434, "rewards_train/1-l": -1.571915626525879, "rewards_train/1-w": 2.8456478118896484, "rewards_train/2-2": 2.7090883255004883, "rewards_train/2-w": 1.4700891971588135, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.417563438415527, "rewards_train/margins_1": 1.710278034210205, "rewards_train/margins_2": 1.2389991283416748, "step": 467 }, { "epoch": 1.4, "logps_train/policy_1_2": -173.54173278808594, "logps_train/policy_1_l": -263.5985107421875, "logps_train/policy_1_w": -110.5263671875, "logps_train/policy_2_2": -145.6219482421875, "logps_train/policy_2_w": -150.24111938476562, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -243.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 2.0337181091308594, "rewards_train/1-l": -2.0737171173095703, "rewards_train/1-w": 3.3270509243011475, "rewards_train/2-2": 3.2174935340881348, "rewards_train/2-w": 1.8930745124816895, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.400768041610718, "rewards_train/margins_1": 1.293332815170288, "rewards_train/margins_2": 1.3244190216064453, "step": 467 }, { "epoch": 1.4, "logps_train/policy_1_2": -155.5494842529297, "logps_train/policy_1_l": -196.80194091796875, "logps_train/policy_1_w": -113.86888122558594, "logps_train/policy_2_2": -126.22080993652344, "logps_train/policy_2_w": -149.9606475830078, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 2.288801670074463, "rewards_train/1-l": -2.325505256652832, "rewards_train/1-w": 3.1787376403808594, "rewards_train/2-2": 3.559169292449951, "rewards_train/2-w": 2.1945605278015137, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.504242897033691, "rewards_train/margins_1": 0.8899359703063965, "rewards_train/margins_2": 1.3646087646484375, "step": 467 }, { "epoch": 1.4, "logps_train/policy_1_2": -181.0850372314453, "logps_train/policy_1_l": -169.663818359375, "logps_train/policy_1_w": -140.32147216796875, "logps_train/policy_2_2": -136.03762817382812, "logps_train/policy_2_w": -176.69937133789062, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.4852468967437744, "rewards_train/1-l": -3.1150155067443848, "rewards_train/1-w": 3.252228260040283, "rewards_train/2-2": 3.214792490005493, "rewards_train/2-w": 1.3956866264343262, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.367243766784668, "rewards_train/margins_1": 1.7669813632965088, "rewards_train/margins_2": 1.819105863571167, "step": 467 }, { "epoch": 1.4, "logps_train/policy_1_2": -106.59024810791016, "logps_train/policy_1_l": -98.60231018066406, "logps_train/policy_1_w": -104.87590026855469, "logps_train/policy_2_2": -86.6607894897461, "logps_train/policy_2_w": -138.81336975097656, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.695662498474121, "rewards_train/1-l": -1.7776918411254883, "rewards_train/1-w": 3.0111608505249023, "rewards_train/2-2": 2.226499557495117, "rewards_train/2-w": 1.4224627017974854, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.788852691650391, "rewards_train/margins_1": 1.3154983520507812, "rewards_train/margins_2": 0.8040368556976318, "step": 467 }, { "epoch": 1.4, "logps_train/policy_1_2": -171.3066864013672, "logps_train/policy_1_l": -208.7854461669922, "logps_train/policy_1_w": -135.79100036621094, "logps_train/policy_2_2": -137.7781982421875, "logps_train/policy_2_w": -170.33465576171875, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.6802688837051392, "rewards_train/1-l": -3.434403896331787, "rewards_train/1-w": 3.62363338470459, "rewards_train/2-2": 3.0872201919555664, "rewards_train/2-w": 1.9520820379257202, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.058037281036377, "rewards_train/margins_1": 1.9433645009994507, "rewards_train/margins_2": 1.1351381540298462, "step": 467 }, { "epoch": 1.4, "logps_train/policy_1_2": -174.97686767578125, "logps_train/policy_1_l": -186.60699462890625, "logps_train/policy_1_w": -107.93984985351562, "logps_train/policy_2_2": -130.9645538330078, "logps_train/policy_2_w": -148.65811157226562, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 0.9304382801055908, "rewards_train/1-l": -2.1216373443603516, "rewards_train/1-w": 2.8755459785461426, "rewards_train/2-2": 2.4426074028015137, "rewards_train/2-w": 1.7193455696105957, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.997183322906494, "rewards_train/margins_1": 1.9451076984405518, "rewards_train/margins_2": 0.723261833190918, "step": 467 }, { "epoch": 1.4, "learning_rate": 1.1446224430961037e-06, "loss": 0.4665, "step": 468 }, { "epoch": 1.4, "logps_train/policy_1_2": -219.57559204101562, "logps_train/policy_1_l": -215.42344665527344, "logps_train/policy_1_w": -174.25833129882812, "logps_train/policy_2_2": -164.68841552734375, "logps_train/policy_2_w": -222.6310577392578, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 1.1736907958984375, "rewards_train/1-l": -1.8485941886901855, "rewards_train/1-w": 4.374167442321777, "rewards_train/2-2": 3.7874083518981934, "rewards_train/2-w": 2.42439341545105, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.222761631011963, "rewards_train/margins_1": 3.20047664642334, "rewards_train/margins_2": 1.3630149364471436, "step": 468 }, { "epoch": 1.4, "logps_train/policy_1_2": -162.48471069335938, "logps_train/policy_1_l": -215.06800842285156, "logps_train/policy_1_w": -138.65872192382812, "logps_train/policy_2_2": -121.21841430664062, "logps_train/policy_2_w": -182.7987060546875, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.9515290260314941, "rewards_train/1-l": -3.291567087173462, "rewards_train/1-w": 3.509908437728882, "rewards_train/2-2": 3.68440842628479, "rewards_train/2-w": 1.758800983428955, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.801475524902344, "rewards_train/margins_1": 1.5583794116973877, "rewards_train/margins_2": 1.925607442855835, "step": 468 }, { "epoch": 1.4, "logps_train/policy_1_2": -138.95993041992188, "logps_train/policy_1_l": -177.68724060058594, "logps_train/policy_1_w": -146.71200561523438, "logps_train/policy_2_2": -98.05158996582031, "logps_train/policy_2_w": -196.03237915039062, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.5805705785751343, "rewards_train/1-l": -1.8282941579818726, "rewards_train/1-w": 3.514737129211426, "rewards_train/2-2": 2.738981246948242, "rewards_train/2-w": 1.4217617511749268, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.343031287193298, "rewards_train/margins_1": 1.9341665506362915, "rewards_train/margins_2": 1.3172194957733154, "step": 468 }, { "epoch": 1.4, "logps_train/policy_1_2": -115.99996948242188, "logps_train/policy_1_l": -136.44754028320312, "logps_train/policy_1_w": -148.29685974121094, "logps_train/policy_2_2": -91.326416015625, "logps_train/policy_2_w": -182.28643798828125, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.8625024557113647, "rewards_train/1-l": -2.1545186042785645, "rewards_train/1-w": 3.016016960144043, "rewards_train/2-2": 2.956420660018921, "rewards_train/2-w": 1.3920601606369019, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.170535564422607, "rewards_train/margins_1": 1.1535145044326782, "rewards_train/margins_2": 1.564360499382019, "step": 468 }, { "epoch": 1.4, "logps_train/policy_1_2": -177.0257568359375, "logps_train/policy_1_l": -126.48773956298828, "logps_train/policy_1_w": -104.7479476928711, "logps_train/policy_2_2": -136.60855102539062, "logps_train/policy_2_w": -148.42552185058594, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 2.0224244594573975, "rewards_train/1-l": -1.7405709028244019, "rewards_train/1-w": 3.3002054691314697, "rewards_train/2-2": 3.401644706726074, "rewards_train/2-w": 1.7230732440948486, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.040776371955872, "rewards_train/margins_1": 1.2777810096740723, "rewards_train/margins_2": 1.6785714626312256, "step": 468 }, { "epoch": 1.4, "logps_train/policy_1_2": -150.70184326171875, "logps_train/policy_1_l": -181.0338134765625, "logps_train/policy_1_w": -115.47898864746094, "logps_train/policy_2_2": -112.95559692382812, "logps_train/policy_2_w": -155.55604553222656, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.3678042888641357, "rewards_train/1-l": -1.6623656749725342, "rewards_train/1-w": 2.9255387783050537, "rewards_train/2-2": 2.757467269897461, "rewards_train/2-w": 1.368614912033081, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.587904453277588, "rewards_train/margins_1": 1.557734489440918, "rewards_train/margins_2": 1.3888523578643799, "step": 468 }, { "epoch": 1.4, "logps_train/policy_1_2": -103.1154556274414, "logps_train/policy_1_l": -162.37753295898438, "logps_train/policy_1_w": -146.143798828125, "logps_train/policy_2_2": -77.33296203613281, "logps_train/policy_2_w": -181.67898559570312, "logps_train/ref_1_2": -117.5, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.4533953666687012, "rewards_train/1-l": -2.0314536094665527, "rewards_train/1-w": 2.855933666229248, "rewards_train/2-2": 2.488041400909424, "rewards_train/2-w": 1.2805383205413818, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.887387275695801, "rewards_train/margins_1": 1.4025382995605469, "rewards_train/margins_2": 1.207503080368042, "step": 468 }, { "epoch": 1.4, "logps_train/policy_1_2": -179.93557739257812, "logps_train/policy_1_l": -265.9503173828125, "logps_train/policy_1_w": -175.69271850585938, "logps_train/policy_2_2": -151.57952880859375, "logps_train/policy_2_w": -217.61380004882812, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -234.0, "logps_train/ref_1_w": -216.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 2.656442165374756, "rewards_train/1-l": -3.168275833129883, "rewards_train/1-w": 4.060416221618652, "rewards_train/2-2": 3.8826732635498047, "rewards_train/2-w": 2.10033917427063, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.228692054748535, "rewards_train/margins_1": 1.4039740562438965, "rewards_train/margins_2": 1.7823340892791748, "step": 468 }, { "epoch": 1.4, "logps_train/policy_1_2": -133.58511352539062, "logps_train/policy_1_l": -108.10881042480469, "logps_train/policy_1_w": -128.84774780273438, "logps_train/policy_2_2": -108.59090423583984, "logps_train/policy_2_w": -153.4656219482422, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.8008642196655273, "rewards_train/1-l": -1.0718188285827637, "rewards_train/1-w": 2.8738179206848145, "rewards_train/2-2": 2.5455970764160156, "rewards_train/2-w": 1.7100772857666016, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.945636749267578, "rewards_train/margins_1": 1.072953701019287, "rewards_train/margins_2": 0.8355197906494141, "step": 469 }, { "epoch": 1.4, "logps_train/policy_1_2": -89.26390075683594, "logps_train/policy_1_l": -161.82579040527344, "logps_train/policy_1_w": -71.47406005859375, "logps_train/policy_2_2": -67.35712432861328, "logps_train/policy_2_w": -90.63195037841797, "logps_train/ref_1_2": -99.5, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -87.5, "logps_train/ref_2_w": -101.5, "rewards_train/1-2": 1.0204848051071167, "rewards_train/1-l": -3.698399543762207, "rewards_train/1-w": 1.9885313510894775, "rewards_train/2-2": 2.000225305557251, "rewards_train/2-w": 1.0813361406326294, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.686930894851685, "rewards_train/margins_1": 0.9680465459823608, "rewards_train/margins_2": 0.9188891649246216, "step": 469 }, { "epoch": 1.4, "logps_train/policy_1_2": -156.33554077148438, "logps_train/policy_1_l": -178.44993591308594, "logps_train/policy_1_w": -113.77864074707031, "logps_train/policy_2_2": -123.38291931152344, "logps_train/policy_2_w": -139.78182983398438, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.536757230758667, "rewards_train/1-l": -2.263353109359741, "rewards_train/1-w": 2.7260429859161377, "rewards_train/2-2": 2.9804575443267822, "rewards_train/2-w": 1.9600985050201416, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.989396095275879, "rewards_train/margins_1": 1.1892857551574707, "rewards_train/margins_2": 1.0203590393066406, "step": 469 }, { "epoch": 1.4, "logps_train/policy_1_2": -149.849365234375, "logps_train/policy_1_l": -170.42596435546875, "logps_train/policy_1_w": -127.74627685546875, "logps_train/policy_2_2": -102.66737365722656, "logps_train/policy_2_w": -168.53868103027344, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.0010004043579102, "rewards_train/1-l": -2.5160329341888428, "rewards_train/1-w": 3.8128719329833984, "rewards_train/2-2": 2.666074275970459, "rewards_train/2-w": 2.407069444656372, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.328904867172241, "rewards_train/margins_1": 2.8118715286254883, "rewards_train/margins_2": 0.2590048313140869, "step": 469 }, { "epoch": 1.4, "logps_train/policy_1_2": -192.8927459716797, "logps_train/policy_1_l": -205.9017333984375, "logps_train/policy_1_w": -132.32998657226562, "logps_train/policy_2_2": -152.99514770507812, "logps_train/policy_2_w": -178.5736541748047, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.7638492584228516, "rewards_train/1-l": -2.461658000946045, "rewards_train/1-w": 3.7443933486938477, "rewards_train/2-2": 3.555173635482788, "rewards_train/2-w": 2.006990432739258, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.206051349639893, "rewards_train/margins_1": 1.980544090270996, "rewards_train/margins_2": 1.5481832027435303, "step": 469 }, { "epoch": 1.4, "logps_train/policy_1_2": -192.44296264648438, "logps_train/policy_1_l": -142.77175903320312, "logps_train/policy_1_w": -119.84012603759766, "logps_train/policy_2_2": -152.9462890625, "logps_train/policy_2_w": -154.39813232421875, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.6732820272445679, "rewards_train/1-l": -1.6353309154510498, "rewards_train/1-w": 3.7066128253936768, "rewards_train/2-2": 3.3168954849243164, "rewards_train/2-w": 2.1539366245269775, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.341943740844727, "rewards_train/margins_1": 2.033330798149109, "rewards_train/margins_2": 1.1629588603973389, "step": 469 }, { "epoch": 1.4, "logps_train/policy_1_2": -138.2489776611328, "logps_train/policy_1_l": -161.7461700439453, "logps_train/policy_1_w": -107.399658203125, "logps_train/policy_2_2": -100.82199096679688, "logps_train/policy_2_w": -144.54507446289062, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.3204150199890137, "rewards_train/1-l": -2.110163688659668, "rewards_train/1-w": 3.2272214889526367, "rewards_train/2-2": 2.6931910514831543, "rewards_train/2-w": 1.4908055067062378, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.337385177612305, "rewards_train/margins_1": 1.906806468963623, "rewards_train/margins_2": 1.2023855447769165, "step": 469 }, { "epoch": 1.4, "logps_train/policy_1_2": -145.00973510742188, "logps_train/policy_1_l": -147.57015991210938, "logps_train/policy_1_w": -91.79974365234375, "logps_train/policy_2_2": -118.84588623046875, "logps_train/policy_2_w": -106.44534301757812, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.188089370727539, "rewards_train/1-l": -2.0679538249969482, "rewards_train/1-w": 2.310650587081909, "rewards_train/2-2": 2.6083803176879883, "rewards_train/2-w": 1.9992159605026245, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.378604412078857, "rewards_train/margins_1": 1.1225612163543701, "rewards_train/margins_2": 0.6091643571853638, "step": 469 }, { "epoch": 1.41, "learning_rate": 1.1239356135643544e-06, "loss": 0.3923, "step": 470 }, { "epoch": 1.41, "logps_train/policy_1_2": -122.76472473144531, "logps_train/policy_1_l": -120.93128204345703, "logps_train/policy_1_w": -114.96760559082031, "logps_train/policy_2_2": -97.80599975585938, "logps_train/policy_2_w": -152.9969024658203, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.3094651699066162, "rewards_train/1-l": -0.998597264289856, "rewards_train/1-w": 2.8438644409179688, "rewards_train/2-2": 2.140493869781494, "rewards_train/2-w": 1.1768720149993896, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8424617052078247, "rewards_train/margins_1": 1.5343992710113525, "rewards_train/margins_2": 0.9636218547821045, "step": 470 }, { "epoch": 1.41, "logps_train/policy_1_2": -138.88885498046875, "logps_train/policy_1_l": -201.0391845703125, "logps_train/policy_1_w": -132.99636840820312, "logps_train/policy_2_2": -102.3880615234375, "logps_train/policy_2_w": -189.3876953125, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 1.1880674362182617, "rewards_train/1-l": -2.40313720703125, "rewards_train/1-w": 3.3316142559051514, "rewards_train/2-2": 2.3363897800445557, "rewards_train/2-w": 1.3956059217453003, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.734751462936401, "rewards_train/margins_1": 2.1435468196868896, "rewards_train/margins_2": 0.9407838582992554, "step": 470 }, { "epoch": 1.41, "logps_train/policy_1_2": -135.04701232910156, "logps_train/policy_1_l": -113.54499816894531, "logps_train/policy_1_w": -65.76564025878906, "logps_train/policy_2_2": -102.54075622558594, "logps_train/policy_2_w": -83.44112396240234, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -92.5, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -102.0, "rewards_train/1-2": 1.6890490055084229, "rewards_train/1-l": -1.6185622215270996, "rewards_train/1-w": 2.6671864986419678, "rewards_train/2-2": 2.939673900604248, "rewards_train/2-w": 1.8398725986480713, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.285748720169067, "rewards_train/margins_1": 0.9781374931335449, "rewards_train/margins_2": 1.0998013019561768, "step": 470 }, { "epoch": 1.41, "logps_train/policy_1_2": -144.29510498046875, "logps_train/policy_1_l": -100.32437133789062, "logps_train/policy_1_w": -39.65728759765625, "logps_train/policy_2_2": -94.4947738647461, "logps_train/policy_2_w": -73.28715515136719, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -80.5, "logps_train/ref_1_w": -60.75, "logps_train/ref_2_2": -122.5, "logps_train/ref_2_w": -81.5, "rewards_train/1-2": 0.6697079539299011, "rewards_train/1-l": -1.9933750629425049, "rewards_train/1-w": 2.112396240234375, "rewards_train/2-2": 2.795053243637085, "rewards_train/2-w": 0.8376908302307129, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.10577130317688, "rewards_train/margins_1": 1.4426882863044739, "rewards_train/margins_2": 1.957362413406372, "step": 470 }, { "epoch": 1.41, "logps_train/policy_1_2": -189.68637084960938, "logps_train/policy_1_l": -162.6277618408203, "logps_train/policy_1_w": -144.42225646972656, "logps_train/policy_2_2": -152.54788208007812, "logps_train/policy_2_w": -178.5043182373047, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 2.0407376289367676, "rewards_train/1-l": -1.7190264463424683, "rewards_train/1-w": 3.282773971557617, "rewards_train/2-2": 3.5452120304107666, "rewards_train/2-w": 1.949568271636963, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.0018004179000854, "rewards_train/margins_1": 1.2420363426208496, "rewards_train/margins_2": 1.5956437587738037, "step": 470 }, { "epoch": 1.41, "logps_train/policy_1_2": -179.7940673828125, "logps_train/policy_1_l": -197.55328369140625, "logps_train/policy_1_w": -101.20462036132812, "logps_train/policy_2_2": -149.41050720214844, "logps_train/policy_2_w": -133.84100341796875, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.3940300941467285, "rewards_train/1-l": -2.4830641746520996, "rewards_train/1-w": 4.025631427764893, "rewards_train/2-2": 2.7151999473571777, "rewards_train/2-w": 2.9487130641937256, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.508695602416992, "rewards_train/margins_1": 2.631601333618164, "rewards_train/margins_2": -0.23351311683654785, "step": 470 }, { "epoch": 1.41, "logps_train/policy_1_2": -130.5402374267578, "logps_train/policy_1_l": -172.57740783691406, "logps_train/policy_1_w": -151.53184509277344, "logps_train/policy_2_2": -99.81723022460938, "logps_train/policy_2_w": -192.05239868164062, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 1.6116994619369507, "rewards_train/1-l": -2.1012461185455322, "rewards_train/1-w": 3.1124396324157715, "rewards_train/2-2": 2.3907387256622314, "rewards_train/2-w": 1.300424337387085, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.213685750961304, "rewards_train/margins_1": 1.5007401704788208, "rewards_train/margins_2": 1.0903143882751465, "step": 470 }, { "epoch": 1.41, "logps_train/policy_1_2": -92.95814514160156, "logps_train/policy_1_l": -101.69175720214844, "logps_train/policy_1_w": -70.84481811523438, "logps_train/policy_2_2": -69.58234405517578, "logps_train/policy_2_w": -102.04766845703125, "logps_train/ref_1_2": -105.5, "logps_train/ref_1_l": -80.5, "logps_train/ref_1_w": -94.5, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 1.2604358196258545, "rewards_train/1-l": -2.0984725952148438, "rewards_train/1-w": 2.388955593109131, "rewards_train/2-2": 2.223797082901001, "rewards_train/2-w": 1.6690616607666016, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.487428188323975, "rewards_train/margins_1": 1.1285197734832764, "rewards_train/margins_2": 0.5547354221343994, "step": 470 }, { "epoch": 1.41, "logps_train/policy_1_2": -140.56280517578125, "logps_train/policy_1_l": -169.27273559570312, "logps_train/policy_1_w": -106.42916107177734, "logps_train/policy_2_2": -115.41346740722656, "logps_train/policy_2_w": -125.86382293701172, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.509344458580017, "rewards_train/1-l": -1.8261990547180176, "rewards_train/1-w": 3.533647060394287, "rewards_train/2-2": 2.503185272216797, "rewards_train/2-w": 2.6386172771453857, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.359846115112305, "rewards_train/margins_1": 2.02430260181427, "rewards_train/margins_2": -0.13543200492858887, "step": 471 }, { "epoch": 1.41, "logps_train/policy_1_2": -121.82337951660156, "logps_train/policy_1_l": -106.9912109375, "logps_train/policy_1_w": -97.10222625732422, "logps_train/policy_2_2": -94.77055358886719, "logps_train/policy_2_w": -115.42285919189453, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.6325057744979858, "rewards_train/1-l": -1.004491925239563, "rewards_train/1-w": 2.4368479251861572, "rewards_train/2-2": 2.703413486480713, "rewards_train/2-w": 1.4338858127593994, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.44133985042572, "rewards_train/margins_1": 0.8043421506881714, "rewards_train/margins_2": 1.2695276737213135, "step": 471 }, { "epoch": 1.41, "logps_train/policy_1_2": -109.92803955078125, "logps_train/policy_1_l": -162.61007690429688, "logps_train/policy_1_w": -62.85712432861328, "logps_train/policy_2_2": -78.46891784667969, "logps_train/policy_2_w": -100.55496215820312, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -117.0, "rewards_train/1-2": 0.926728367805481, "rewards_train/1-l": -1.8631553649902344, "rewards_train/1-w": 2.7392873764038086, "rewards_train/2-2": 2.1492016315460205, "rewards_train/2-w": 1.6663786172866821, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.602442741394043, "rewards_train/margins_1": 1.8125590085983276, "rewards_train/margins_2": 0.4828230142593384, "step": 471 }, { "epoch": 1.41, "logps_train/policy_1_2": -117.251708984375, "logps_train/policy_1_l": -95.0813217163086, "logps_train/policy_1_w": -67.73776245117188, "logps_train/policy_2_2": -94.45832061767578, "logps_train/policy_2_w": -83.16600036621094, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -80.0, "logps_train/ref_1_w": -89.5, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -97.0, "rewards_train/1-2": 1.2599855661392212, "rewards_train/1-l": -1.5059831142425537, "rewards_train/1-w": 2.1730988025665283, "rewards_train/2-2": 2.1084647178649902, "rewards_train/2-w": 1.3619157075881958, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.679081916809082, "rewards_train/margins_1": 0.9131132364273071, "rewards_train/margins_2": 0.7465490102767944, "step": 471 }, { "epoch": 1.41, "logps_train/policy_1_2": -135.3506622314453, "logps_train/policy_1_l": -159.84466552734375, "logps_train/policy_1_w": -108.65282440185547, "logps_train/policy_2_2": -100.19796752929688, "logps_train/policy_2_w": -162.40673828125, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 1.2063400745391846, "rewards_train/1-l": -1.7305614948272705, "rewards_train/1-w": 2.409327268600464, "rewards_train/2-2": 2.295046806335449, "rewards_train/2-w": 0.6565930247306824, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.139888763427734, "rewards_train/margins_1": 1.2029871940612793, "rewards_train/margins_2": 1.6384537816047668, "step": 471 }, { "epoch": 1.41, "logps_train/policy_1_2": -176.31930541992188, "logps_train/policy_1_l": -171.04916381835938, "logps_train/policy_1_w": -142.07949829101562, "logps_train/policy_2_2": -141.0834503173828, "logps_train/policy_2_w": -169.68267822265625, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.9227573871612549, "rewards_train/1-l": -1.4361662864685059, "rewards_train/1-w": 3.1498637199401855, "rewards_train/2-2": 3.363529682159424, "rewards_train/2-w": 2.0254812240600586, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.586030006408691, "rewards_train/margins_1": 1.2271063327789307, "rewards_train/margins_2": 1.3380484580993652, "step": 471 }, { "epoch": 1.41, "logps_train/policy_1_2": -149.97415161132812, "logps_train/policy_1_l": -121.82109069824219, "logps_train/policy_1_w": -103.38052368164062, "logps_train/policy_2_2": -115.22957611083984, "logps_train/policy_2_w": -146.9429931640625, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.6838350296020508, "rewards_train/1-l": -1.4704389572143555, "rewards_train/1-w": 3.605795383453369, "rewards_train/2-2": 2.900479316711426, "rewards_train/2-w": 1.5766959190368652, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.076234340667725, "rewards_train/margins_1": 1.9219603538513184, "rewards_train/margins_2": 1.3237833976745605, "step": 471 }, { "epoch": 1.41, "logps_train/policy_1_2": -250.42962646484375, "logps_train/policy_1_l": -277.4654541015625, "logps_train/policy_1_w": -230.49964904785156, "logps_train/policy_2_2": -203.5650634765625, "logps_train/policy_2_w": -273.84820556640625, "logps_train/ref_1_2": -278.0, "logps_train/ref_1_l": -240.0, "logps_train/ref_1_w": -280.0, "logps_train/ref_2_2": -250.0, "logps_train/ref_2_w": -304.0, "rewards_train/1-2": 2.8445374965667725, "rewards_train/1-l": -3.690295696258545, "rewards_train/1-w": 4.95003604888916, "rewards_train/2-2": 4.612244129180908, "rewards_train/2-w": 3.0464284420013428, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 8.640331745147705, "rewards_train/margins_1": 2.1054985523223877, "rewards_train/margins_2": 1.5658156871795654, "step": 471 }, { "epoch": 1.41, "learning_rate": 1.1033830854908692e-06, "loss": 0.4445, "step": 472 }, { "epoch": 1.41, "logps_train/policy_1_2": -238.149658203125, "logps_train/policy_1_l": -236.91616821289062, "logps_train/policy_1_w": -138.72232055664062, "logps_train/policy_2_2": -189.2154541015625, "logps_train/policy_2_w": -176.85450744628906, "logps_train/ref_1_2": -253.0, "logps_train/ref_1_l": -214.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -224.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.4940184354782104, "rewards_train/1-l": -2.2679848670959473, "rewards_train/1-w": 2.8574564456939697, "rewards_train/2-2": 3.3675172328948975, "rewards_train/2-w": 1.4348613023757935, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.125441312789917, "rewards_train/margins_1": 1.3634380102157593, "rewards_train/margins_2": 1.932655930519104, "step": 472 }, { "epoch": 1.41, "logps_train/policy_1_2": -156.76046752929688, "logps_train/policy_1_l": -228.8175506591797, "logps_train/policy_1_w": -174.26953125, "logps_train/policy_2_2": -132.15203857421875, "logps_train/policy_2_w": -209.50621032714844, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 2.1552042961120605, "rewards_train/1-l": -2.6028494834899902, "rewards_train/1-w": 3.748046398162842, "rewards_train/2-2": 3.1722958087921143, "rewards_train/2-w": 2.318129777908325, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.350895881652832, "rewards_train/margins_1": 1.5928421020507812, "rewards_train/margins_2": 0.8541660308837891, "step": 472 }, { "epoch": 1.41, "logps_train/policy_1_2": -125.59490966796875, "logps_train/policy_1_l": -100.30059814453125, "logps_train/policy_1_w": -121.23908996582031, "logps_train/policy_2_2": -92.9531478881836, "logps_train/policy_2_w": -165.19342041015625, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 1.0108214616775513, "rewards_train/1-l": -1.0609195232391357, "rewards_train/1-w": 2.9854655265808105, "rewards_train/2-2": 2.3765597343444824, "rewards_train/2-w": 0.9900341033935547, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.046385049819946, "rewards_train/margins_1": 1.9746440649032593, "rewards_train/margins_2": 1.3865256309509277, "step": 472 }, { "epoch": 1.41, "logps_train/policy_1_2": -231.4229736328125, "logps_train/policy_1_l": -205.50564575195312, "logps_train/policy_1_w": -143.3667755126953, "logps_train/policy_2_2": -188.65277099609375, "logps_train/policy_2_w": -171.83770751953125, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.742078185081482, "rewards_train/1-l": -1.8818161487579346, "rewards_train/1-w": 3.288323402404785, "rewards_train/2-2": 3.2690987586975098, "rewards_train/2-w": 2.3568549156188965, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.17013955116272, "rewards_train/margins_1": 1.5462452173233032, "rewards_train/margins_2": 0.9122438430786133, "step": 472 }, { "epoch": 1.41, "logps_train/policy_1_2": -90.48329162597656, "logps_train/policy_1_l": -58.72068405151367, "logps_train/policy_1_w": -115.4207763671875, "logps_train/policy_2_2": -63.5808219909668, "logps_train/policy_2_w": -178.06094360351562, "logps_train/ref_1_2": -98.5, "logps_train/ref_1_l": -51.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -80.5, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 0.8094835877418518, "rewards_train/1-l": -0.7701151371002197, "rewards_train/1-w": 3.3962037563323975, "rewards_train/2-2": 1.6958237886428833, "rewards_train/2-w": 0.5220305919647217, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.166318893432617, "rewards_train/margins_1": 2.5867201685905457, "rewards_train/margins_2": 1.1737931966781616, "step": 472 }, { "epoch": 1.41, "logps_train/policy_1_2": -175.97103881835938, "logps_train/policy_1_l": -130.6944122314453, "logps_train/policy_1_w": -78.83472442626953, "logps_train/policy_2_2": -142.011474609375, "logps_train/policy_2_w": -111.72959899902344, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -115.5, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.8560211658477783, "rewards_train/1-l": -1.5249106884002686, "rewards_train/1-w": 2.667309045791626, "rewards_train/2-2": 3.389087438583374, "rewards_train/2-w": 1.6989145278930664, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.1922197341918945, "rewards_train/margins_1": 0.8112878799438477, "rewards_train/margins_2": 1.6901729106903076, "step": 472 }, { "epoch": 1.41, "logps_train/policy_1_2": -131.6229248046875, "logps_train/policy_1_l": -149.32867431640625, "logps_train/policy_1_w": -116.6212158203125, "logps_train/policy_2_2": -88.10228729248047, "logps_train/policy_2_w": -168.60015869140625, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 0.6923967003822327, "rewards_train/1-l": -1.4752874374389648, "rewards_train/1-w": 4.21756649017334, "rewards_train/2-2": 2.292896032333374, "rewards_train/2-w": 2.4071717262268066, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.692853927612305, "rewards_train/margins_1": 3.525169789791107, "rewards_train/margins_2": -0.11427569389343262, "step": 472 }, { "epoch": 1.41, "logps_train/policy_1_2": -212.60299682617188, "logps_train/policy_1_l": -236.28182983398438, "logps_train/policy_1_w": -189.887939453125, "logps_train/policy_2_2": -163.61038208007812, "logps_train/policy_2_w": -239.00807189941406, "logps_train/ref_1_2": -235.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -236.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 2.25219988822937, "rewards_train/1-l": -2.469588279724121, "rewards_train/1-w": 4.667455673217773, "rewards_train/2-2": 4.182712554931641, "rewards_train/2-w": 2.8054423332214355, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.1370439529418945, "rewards_train/margins_1": 2.4152557849884033, "rewards_train/margins_2": 1.377270221710205, "step": 472 }, { "epoch": 1.42, "logps_train/policy_1_2": -167.1084442138672, "logps_train/policy_1_l": -168.26290893554688, "logps_train/policy_1_w": -86.05050659179688, "logps_train/policy_2_2": -129.77549743652344, "logps_train/policy_2_w": -124.5632553100586, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.8047804832458496, "rewards_train/1-l": -3.0919158458709717, "rewards_train/1-w": 3.082449197769165, "rewards_train/2-2": 3.0474510192871094, "rewards_train/2-w": 1.6405494213104248, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.174365043640137, "rewards_train/margins_1": 1.2776687145233154, "rewards_train/margins_2": 1.4069015979766846, "step": 473 }, { "epoch": 1.42, "logps_train/policy_1_2": -121.20249938964844, "logps_train/policy_1_l": -142.80831909179688, "logps_train/policy_1_w": -98.68946838378906, "logps_train/policy_2_2": -94.01959228515625, "logps_train/policy_2_w": -124.83251953125, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.9359997510910034, "rewards_train/1-l": -1.6560282707214355, "rewards_train/1-w": 3.2404274940490723, "rewards_train/2-2": 3.0589776039123535, "rewards_train/2-w": 2.283935546875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.896455764770508, "rewards_train/margins_1": 1.3044277429580688, "rewards_train/margins_2": 0.7750420570373535, "step": 473 }, { "epoch": 1.42, "logps_train/policy_1_2": -249.35498046875, "logps_train/policy_1_l": -191.74261474609375, "logps_train/policy_1_w": -153.448974609375, "logps_train/policy_2_2": -201.7115478515625, "logps_train/policy_2_w": -195.5506591796875, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -241.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.769970178604126, "rewards_train/1-l": -2.5709407329559326, "rewards_train/1-w": 3.8597898483276367, "rewards_train/2-2": 3.9530627727508545, "rewards_train/2-w": 2.396495819091797, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.430730581283569, "rewards_train/margins_1": 2.0898196697235107, "rewards_train/margins_2": 1.5565669536590576, "step": 473 }, { "epoch": 1.42, "logps_train/policy_1_2": -136.01708984375, "logps_train/policy_1_l": -78.68141174316406, "logps_train/policy_1_w": -49.95746612548828, "logps_train/policy_2_2": -95.02940368652344, "logps_train/policy_2_w": -75.59928131103516, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -56.25, "logps_train/ref_1_w": -71.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": 1.0436036586761475, "rewards_train/1-l": -2.2521252632141113, "rewards_train/1-w": 2.1122612953186035, "rewards_train/2-2": 3.010340929031372, "rewards_train/2-w": 1.6580402851104736, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.364386558532715, "rewards_train/margins_1": 1.068657636642456, "rewards_train/margins_2": 1.3523006439208984, "step": 473 }, { "epoch": 1.42, "logps_train/policy_1_2": -165.87904357910156, "logps_train/policy_1_l": -244.30003356933594, "logps_train/policy_1_w": -73.32350158691406, "logps_train/policy_2_2": -121.04801940917969, "logps_train/policy_2_w": -114.57132720947266, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.1417829990386963, "rewards_train/1-l": -4.192502975463867, "rewards_train/1-w": 2.750462532043457, "rewards_train/2-2": 2.904573440551758, "rewards_train/2-w": 1.5913047790527344, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.942965507507324, "rewards_train/margins_1": 1.6086795330047607, "rewards_train/margins_2": 1.3132686614990234, "step": 473 }, { "epoch": 1.42, "logps_train/policy_1_2": -78.22584533691406, "logps_train/policy_1_l": -70.89115905761719, "logps_train/policy_1_w": -66.98693084716797, "logps_train/policy_2_2": -63.87718200683594, "logps_train/policy_2_w": -84.30730438232422, "logps_train/ref_1_2": -88.5, "logps_train/ref_1_l": -59.5, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -82.0, "logps_train/ref_2_w": -96.5, "rewards_train/1-2": 1.0392320156097412, "rewards_train/1-l": -1.1619676351547241, "rewards_train/1-w": 2.02786922454834, "rewards_train/2-2": 1.8429458141326904, "rewards_train/2-w": 1.2573553323745728, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.189836859703064, "rewards_train/margins_1": 0.9886372089385986, "rewards_train/margins_2": 0.5855904817581177, "step": 473 }, { "epoch": 1.42, "logps_train/policy_1_2": -125.62767028808594, "logps_train/policy_1_l": -202.01251220703125, "logps_train/policy_1_w": -123.24845886230469, "logps_train/policy_2_2": -87.36994934082031, "logps_train/policy_2_w": -171.95130920410156, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 1.161353349685669, "rewards_train/1-l": -3.441193103790283, "rewards_train/1-w": 3.7564048767089844, "rewards_train/2-2": 2.766227960586548, "rewards_train/2-w": 2.1587750911712646, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.197597980499268, "rewards_train/margins_1": 2.5950515270233154, "rewards_train/margins_2": 0.6074528694152832, "step": 473 }, { "epoch": 1.42, "logps_train/policy_1_2": -119.62451934814453, "logps_train/policy_1_l": -161.17822265625, "logps_train/policy_1_w": -125.16751098632812, "logps_train/policy_2_2": -89.28044128417969, "logps_train/policy_2_w": -162.20135498046875, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 2.0266101360321045, "rewards_train/1-l": -2.1404786109924316, "rewards_train/1-w": 1.984615683555603, "rewards_train/2-2": 2.8985185623168945, "rewards_train/2-w": 0.7208803296089172, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.125094294548035, "rewards_train/margins_1": -0.041994452476501465, "rewards_train/margins_2": 2.1776382327079773, "step": 473 }, { "epoch": 1.42, "learning_rate": 1.0829668647661558e-06, "loss": 0.4665, "step": 474 }, { "epoch": 1.42, "logps_train/policy_1_2": -122.66023254394531, "logps_train/policy_1_l": -71.62846374511719, "logps_train/policy_1_w": -73.05345153808594, "logps_train/policy_2_2": -85.2673110961914, "logps_train/policy_2_w": -111.00860595703125, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -60.25, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 0.8917891979217529, "rewards_train/1-l": -1.1314982175827026, "rewards_train/1-w": 2.367311477661133, "rewards_train/2-2": 2.5771751403808594, "rewards_train/2-w": 1.2866400480270386, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.4988096952438354, "rewards_train/margins_1": 1.4755222797393799, "rewards_train/margins_2": 1.2905350923538208, "step": 474 }, { "epoch": 1.42, "logps_train/policy_1_2": -93.96578216552734, "logps_train/policy_1_l": -83.47872161865234, "logps_train/policy_1_w": -65.20068359375, "logps_train/policy_2_2": -71.12199401855469, "logps_train/policy_2_w": -100.23207092285156, "logps_train/ref_1_2": -107.0, "logps_train/ref_1_l": -71.0, "logps_train/ref_1_w": -85.5, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -107.5, "rewards_train/1-2": 1.3065464496612549, "rewards_train/1-l": -1.2585654258728027, "rewards_train/1-w": 1.9877439737319946, "rewards_train/2-2": 1.99014413356781, "rewards_train/2-w": 0.7283551692962646, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2463093996047974, "rewards_train/margins_1": 0.6811975240707397, "rewards_train/margins_2": 1.2617889642715454, "step": 474 }, { "epoch": 1.42, "logps_train/policy_1_2": -145.48292541503906, "logps_train/policy_1_l": -84.64945983886719, "logps_train/policy_1_w": -72.96028900146484, "logps_train/policy_2_2": -109.23616027832031, "logps_train/policy_2_w": -104.91492462158203, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -99.5, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": 0.922019362449646, "rewards_train/1-l": -0.6977584958076477, "rewards_train/1-w": 2.622525930404663, "rewards_train/2-2": 2.453728199005127, "rewards_train/2-w": 1.1348750591278076, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.320284426212311, "rewards_train/margins_1": 1.700506567955017, "rewards_train/margins_2": 1.3188531398773193, "step": 474 }, { "epoch": 1.42, "logps_train/policy_1_2": -229.38018798828125, "logps_train/policy_1_l": -211.52886962890625, "logps_train/policy_1_w": -173.47650146484375, "logps_train/policy_2_2": -192.8096466064453, "logps_train/policy_2_w": -225.98422241210938, "logps_train/ref_1_2": -255.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -236.0, "logps_train/ref_2_w": -247.0, "rewards_train/1-2": 2.5182318687438965, "rewards_train/1-l": -2.3504467010498047, "rewards_train/1-w": 4.063287734985352, "rewards_train/2-2": 4.276846885681152, "rewards_train/2-w": 2.076578378677368, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.413734436035156, "rewards_train/margins_1": 1.545055866241455, "rewards_train/margins_2": 2.200268507003784, "step": 474 }, { "epoch": 1.42, "logps_train/policy_1_2": -211.16107177734375, "logps_train/policy_1_l": -265.0247497558594, "logps_train/policy_1_w": -140.84991455078125, "logps_train/policy_2_2": -172.22042846679688, "logps_train/policy_2_w": -179.01097106933594, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -238.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 2.3276431560516357, "rewards_train/1-l": -2.6751298904418945, "rewards_train/1-w": 3.390789031982422, "rewards_train/2-2": 3.6967053413391113, "rewards_train/2-w": 2.1283957958221436, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.065918922424316, "rewards_train/margins_1": 1.0631458759307861, "rewards_train/margins_2": 1.5683095455169678, "step": 474 }, { "epoch": 1.42, "logps_train/policy_1_2": -128.04946899414062, "logps_train/policy_1_l": -134.11964416503906, "logps_train/policy_1_w": -165.21664428710938, "logps_train/policy_2_2": -95.49711608886719, "logps_train/policy_2_w": -195.87689208984375, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -201.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.2973968982696533, "rewards_train/1-l": -1.899562120437622, "rewards_train/1-w": 3.579116106033325, "rewards_train/2-2": 2.693256378173828, "rewards_train/2-w": 2.3693408966064453, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.478678226470947, "rewards_train/margins_1": 2.281719207763672, "rewards_train/margins_2": 0.3239154815673828, "step": 474 }, { "epoch": 1.42, "logps_train/policy_1_2": -172.6077880859375, "logps_train/policy_1_l": -221.38705444335938, "logps_train/policy_1_w": -152.24319458007812, "logps_train/policy_2_2": -146.45774841308594, "logps_train/policy_2_w": -182.71539306640625, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 2.4423470497131348, "rewards_train/1-l": -2.128549098968506, "rewards_train/1-w": 3.497555732727051, "rewards_train/2-2": 3.565162181854248, "rewards_train/2-w": 2.0362730026245117, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.626104831695557, "rewards_train/margins_1": 1.055208683013916, "rewards_train/margins_2": 1.5288891792297363, "step": 474 }, { "epoch": 1.42, "logps_train/policy_1_2": -200.60462951660156, "logps_train/policy_1_l": -197.12176513671875, "logps_train/policy_1_w": -162.06326293945312, "logps_train/policy_2_2": -152.6864013671875, "logps_train/policy_2_w": -220.57669067382812, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -235.0, "rewards_train/1-2": 2.1434435844421387, "rewards_train/1-l": -1.8371762037277222, "rewards_train/1-w": 3.7592992782592773, "rewards_train/2-2": 3.5993289947509766, "rewards_train/2-w": 1.4267054796218872, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.5964754819869995, "rewards_train/margins_1": 1.6158556938171387, "rewards_train/margins_2": 2.1726235151290894, "step": 474 }, { "epoch": 1.42, "logps_train/policy_1_2": -204.04608154296875, "logps_train/policy_1_l": -189.0562744140625, "logps_train/policy_1_w": -129.4286346435547, "logps_train/policy_2_2": -159.25323486328125, "logps_train/policy_2_w": -164.01039123535156, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.853203296661377, "rewards_train/1-l": -2.257190227508545, "rewards_train/1-w": 3.9665114879608154, "rewards_train/2-2": 3.762176752090454, "rewards_train/2-w": 2.7208364009857178, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.22370171546936, "rewards_train/margins_1": 2.1133081912994385, "rewards_train/margins_2": 1.0413403511047363, "step": 475 }, { "epoch": 1.42, "logps_train/policy_1_2": -177.59518432617188, "logps_train/policy_1_l": -155.96182250976562, "logps_train/policy_1_w": -126.53976440429688, "logps_train/policy_2_2": -140.94754028320312, "logps_train/policy_2_w": -166.36508178710938, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.5276392698287964, "rewards_train/1-l": -1.9158108234405518, "rewards_train/1-w": 3.083914041519165, "rewards_train/2-2": 3.064279556274414, "rewards_train/2-w": 1.5412250757217407, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.999724864959717, "rewards_train/margins_1": 1.5562747716903687, "rewards_train/margins_2": 1.5230544805526733, "step": 475 }, { "epoch": 1.42, "logps_train/policy_1_2": -272.36590576171875, "logps_train/policy_1_l": -207.83135986328125, "logps_train/policy_1_w": -169.63751220703125, "logps_train/policy_2_2": -221.6246337890625, "logps_train/policy_2_w": -229.43756103515625, "logps_train/ref_1_2": -286.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -207.0, "logps_train/ref_2_2": -254.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 1.4399722814559937, "rewards_train/1-l": -1.6081345081329346, "rewards_train/1-w": 3.7464048862457275, "rewards_train/2-2": 3.1828489303588867, "rewards_train/2-w": 1.6265554428100586, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.354539394378662, "rewards_train/margins_1": 2.306432604789734, "rewards_train/margins_2": 1.5562934875488281, "step": 475 }, { "epoch": 1.42, "logps_train/policy_1_2": -277.9292297363281, "logps_train/policy_1_l": -192.7511444091797, "logps_train/policy_1_w": -173.58828735351562, "logps_train/policy_2_2": -211.92103576660156, "logps_train/policy_2_w": -246.52816772460938, "logps_train/ref_1_2": -292.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -256.0, "logps_train/ref_2_w": -264.0, "rewards_train/1-2": 1.5508270263671875, "rewards_train/1-l": -1.002192497253418, "rewards_train/1-w": 3.829061269760132, "rewards_train/2-2": 4.326646327972412, "rewards_train/2-w": 1.8370277881622314, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.83125376701355, "rewards_train/margins_1": 2.2782342433929443, "rewards_train/margins_2": 2.4896185398101807, "step": 475 }, { "epoch": 1.42, "logps_train/policy_1_2": -137.51638793945312, "logps_train/policy_1_l": -254.52369689941406, "logps_train/policy_1_w": -131.6701202392578, "logps_train/policy_2_2": -100.92399597167969, "logps_train/policy_2_w": -190.99896240234375, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.7428921461105347, "rewards_train/1-l": -2.8516855239868164, "rewards_train/1-w": 3.5251760482788086, "rewards_train/2-2": 2.8001790046691895, "rewards_train/2-w": 1.2790108919143677, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.376861572265625, "rewards_train/margins_1": 1.782283902168274, "rewards_train/margins_2": 1.5211681127548218, "step": 475 }, { "epoch": 1.42, "logps_train/policy_1_2": -131.85818481445312, "logps_train/policy_1_l": -142.8206024169922, "logps_train/policy_1_w": -116.41388702392578, "logps_train/policy_2_2": -101.84224700927734, "logps_train/policy_2_w": -159.5833740234375, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.0684785842895508, "rewards_train/1-l": -2.259404182434082, "rewards_train/1-w": 3.4726736545562744, "rewards_train/2-2": 2.1862833499908447, "rewards_train/2-w": 1.8041622638702393, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.7320778369903564, "rewards_train/margins_1": 2.4041950702667236, "rewards_train/margins_2": 0.38212108612060547, "step": 475 }, { "epoch": 1.42, "logps_train/policy_1_2": -191.475341796875, "logps_train/policy_1_l": -207.18260192871094, "logps_train/policy_1_w": -137.2706756591797, "logps_train/policy_2_2": -146.30335998535156, "logps_train/policy_2_w": -185.44761657714844, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.8571538925170898, "rewards_train/1-l": -2.7249014377593994, "rewards_train/1-w": 3.5162923336029053, "rewards_train/2-2": 3.287632465362549, "rewards_train/2-w": 1.5681285858154297, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.241193771362305, "rewards_train/margins_1": 1.6591384410858154, "rewards_train/margins_2": 1.7195038795471191, "step": 475 }, { "epoch": 1.42, "logps_train/policy_1_2": -164.42584228515625, "logps_train/policy_1_l": -219.48345947265625, "logps_train/policy_1_w": -204.605712890625, "logps_train/policy_2_2": -141.37405395507812, "logps_train/policy_2_w": -233.0314483642578, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -248.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -262.0, "rewards_train/1-2": 2.8058526515960693, "rewards_train/1-l": -1.6733455657958984, "rewards_train/1-w": 4.27692985534668, "rewards_train/2-2": 3.4875941276550293, "rewards_train/2-w": 2.9890432357788086, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.950275421142578, "rewards_train/margins_1": 1.4710772037506104, "rewards_train/margins_2": 0.4985508918762207, "step": 475 }, { "epoch": 1.43, "learning_rate": 1.0626889439773661e-06, "loss": 0.4002, "step": 476 }, { "epoch": 1.43, "logps_train/policy_1_2": -211.63677978515625, "logps_train/policy_1_l": -207.06094360351562, "logps_train/policy_1_w": -114.10699462890625, "logps_train/policy_2_2": -179.17044067382812, "logps_train/policy_2_w": -150.87083435058594, "logps_train/ref_1_2": -231.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.9519460201263428, "rewards_train/1-l": -3.1717190742492676, "rewards_train/1-w": 3.726799726486206, "rewards_train/2-2": 3.4485809803009033, "rewards_train/2-w": 2.300416946411133, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.898518800735474, "rewards_train/margins_1": 1.7748537063598633, "rewards_train/margins_2": 1.1481640338897705, "step": 476 }, { "epoch": 1.43, "logps_train/policy_1_2": -90.22001647949219, "logps_train/policy_1_l": -107.05180358886719, "logps_train/policy_1_w": -82.40446472167969, "logps_train/policy_2_2": -69.53868103027344, "logps_train/policy_2_w": -109.88612365722656, "logps_train/ref_1_2": -107.0, "logps_train/ref_1_l": -86.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.67487370967865, "rewards_train/1-l": -2.0804483890533447, "rewards_train/1-w": 3.0454912185668945, "rewards_train/2-2": 2.361757278442383, "rewards_train/2-w": 1.9258413314819336, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.125939607620239, "rewards_train/margins_1": 1.3706175088882446, "rewards_train/margins_2": 0.4359159469604492, "step": 476 }, { "epoch": 1.43, "logps_train/policy_1_2": -116.47904968261719, "logps_train/policy_1_l": -117.1156005859375, "logps_train/policy_1_w": -94.70389556884766, "logps_train/policy_2_2": -83.26138305664062, "logps_train/policy_2_w": -128.77603149414062, "logps_train/ref_1_2": -122.5, "logps_train/ref_1_l": -98.5, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 0.5958455801010132, "rewards_train/1-l": -1.8228880167007446, "rewards_train/1-w": 2.6905484199523926, "rewards_train/2-2": 1.8762056827545166, "rewards_train/2-w": 1.1223961114883423, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.513436436653137, "rewards_train/margins_1": 2.0947028398513794, "rewards_train/margins_2": 0.7538095712661743, "step": 476 }, { "epoch": 1.43, "logps_train/policy_1_2": -147.15402221679688, "logps_train/policy_1_l": -91.18447875976562, "logps_train/policy_1_w": -98.84896850585938, "logps_train/policy_2_2": -117.33045196533203, "logps_train/policy_2_w": -134.8896942138672, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -75.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 0.6162378787994385, "rewards_train/1-l": -1.639541745185852, "rewards_train/1-w": 2.6115877628326416, "rewards_train/2-2": 1.93756103515625, "rewards_train/2-w": 1.0435497760772705, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.251129508018494, "rewards_train/margins_1": 1.9953498840332031, "rewards_train/margins_2": 0.8940112590789795, "step": 476 }, { "epoch": 1.43, "logps_train/policy_1_2": -122.3756103515625, "logps_train/policy_1_l": -80.84724426269531, "logps_train/policy_1_w": -89.94659423828125, "logps_train/policy_2_2": -94.73797607421875, "logps_train/policy_2_w": -122.10387420654297, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -64.5, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.5866575241088867, "rewards_train/1-l": -1.633162498474121, "rewards_train/1-w": 2.560810089111328, "rewards_train/2-2": 2.9387025833129883, "rewards_train/2-w": 0.8567999601364136, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.193972587585449, "rewards_train/margins_1": 0.9741525650024414, "rewards_train/margins_2": 2.0819026231765747, "step": 476 }, { "epoch": 1.43, "logps_train/policy_1_2": -109.8394546508789, "logps_train/policy_1_l": -111.18607330322266, "logps_train/policy_1_w": -120.52705383300781, "logps_train/policy_2_2": -86.50923156738281, "logps_train/policy_2_w": -145.76553344726562, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.9348046779632568, "rewards_train/1-l": -1.6512243747711182, "rewards_train/1-w": 3.843583345413208, "rewards_train/2-2": 2.849857807159424, "rewards_train/2-w": 2.7773525714874268, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.494807720184326, "rewards_train/margins_1": 1.9087786674499512, "rewards_train/margins_2": 0.07250523567199707, "step": 476 }, { "epoch": 1.43, "logps_train/policy_1_2": -215.17218017578125, "logps_train/policy_1_l": -211.54934692382812, "logps_train/policy_1_w": -166.21408081054688, "logps_train/policy_2_2": -176.52857971191406, "logps_train/policy_2_w": -212.48841857910156, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -203.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": 2.076530933380127, "rewards_train/1-l": -1.447122573852539, "rewards_train/1-w": 3.683279514312744, "rewards_train/2-2": 3.769017219543457, "rewards_train/2-w": 1.6356308460235596, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.130402088165283, "rewards_train/margins_1": 1.6067485809326172, "rewards_train/margins_2": 2.1333863735198975, "step": 476 }, { "epoch": 1.43, "logps_train/policy_1_2": -105.96389770507812, "logps_train/policy_1_l": -129.42124938964844, "logps_train/policy_1_w": -67.11396026611328, "logps_train/policy_2_2": -75.57591247558594, "logps_train/policy_2_w": -86.62400817871094, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -87.5, "logps_train/ref_2_2": -97.5, "logps_train/ref_2_w": -99.5, "rewards_train/1-2": 1.2911109924316406, "rewards_train/1-l": -2.343296766281128, "rewards_train/1-w": 2.0569634437561035, "rewards_train/2-2": 2.1916279792785645, "rewards_train/2-w": 1.271583080291748, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.4002602100372314, "rewards_train/margins_1": 0.7658524513244629, "rewards_train/margins_2": 0.9200448989868164, "step": 476 }, { "epoch": 1.43, "logps_train/policy_1_2": -121.69437408447266, "logps_train/policy_1_l": -200.43746948242188, "logps_train/policy_1_w": -127.04215240478516, "logps_train/policy_2_2": -91.02958679199219, "logps_train/policy_2_w": -163.5089111328125, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.264937162399292, "rewards_train/1-l": -3.2130842208862305, "rewards_train/1-w": 2.7801594734191895, "rewards_train/2-2": 2.4900097846984863, "rewards_train/2-w": 1.4022347927093506, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.99324369430542, "rewards_train/margins_1": 1.5152223110198975, "rewards_train/margins_2": 1.0877749919891357, "step": 477 }, { "epoch": 1.43, "logps_train/policy_1_2": -134.32827758789062, "logps_train/policy_1_l": -80.51307678222656, "logps_train/policy_1_w": -40.25325012207031, "logps_train/policy_2_2": -92.31175231933594, "logps_train/policy_2_w": -67.81855773925781, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -66.0, "logps_train/ref_1_w": -63.5, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -81.0, "rewards_train/1-2": 0.6531111598014832, "rewards_train/1-l": -1.4536511898040771, "rewards_train/1-w": 2.329362392425537, "rewards_train/2-2": 2.3457775115966797, "rewards_train/2-w": 1.350956916809082, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7830135822296143, "rewards_train/margins_1": 1.676251232624054, "rewards_train/margins_2": 0.9948205947875977, "step": 477 }, { "epoch": 1.43, "logps_train/policy_1_2": -126.84283447265625, "logps_train/policy_1_l": -233.77352905273438, "logps_train/policy_1_w": -121.64220428466797, "logps_train/policy_2_2": -101.75313568115234, "logps_train/policy_2_w": -161.08883666992188, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.9789977073669434, "rewards_train/1-l": -2.7545981407165527, "rewards_train/1-w": 3.298865556716919, "rewards_train/2-2": 2.8051557540893555, "rewards_train/2-w": 1.8734397888183594, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.053463697433472, "rewards_train/margins_1": 1.3198678493499756, "rewards_train/margins_2": 0.9317159652709961, "step": 477 }, { "epoch": 1.43, "logps_train/policy_1_2": -135.79351806640625, "logps_train/policy_1_l": -227.46627807617188, "logps_train/policy_1_w": -124.19454956054688, "logps_train/policy_2_2": -105.2021484375, "logps_train/policy_2_w": -162.4388427734375, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.8206477165222168, "rewards_train/1-l": -2.1177210807800293, "rewards_train/1-w": 3.2207798957824707, "rewards_train/2-2": 2.8995120525360107, "rewards_train/2-w": 2.0100221633911133, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.3385009765625, "rewards_train/margins_1": 1.400132179260254, "rewards_train/margins_2": 0.8894898891448975, "step": 477 }, { "epoch": 1.43, "logps_train/policy_1_2": -185.85067749023438, "logps_train/policy_1_l": -127.412841796875, "logps_train/policy_1_w": -109.09629821777344, "logps_train/policy_2_2": -145.64108276367188, "logps_train/policy_2_w": -149.72885131835938, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.821180820465088, "rewards_train/1-l": -1.899096965789795, "rewards_train/1-w": 3.1559953689575195, "rewards_train/2-2": 3.631204128265381, "rewards_train/2-w": 1.0864901542663574, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.0550923347473145, "rewards_train/margins_1": 1.3348145484924316, "rewards_train/margins_2": 2.5447139739990234, "step": 477 }, { "epoch": 1.43, "logps_train/policy_1_2": -196.529541015625, "logps_train/policy_1_l": -154.58697509765625, "logps_train/policy_1_w": -115.53524780273438, "logps_train/policy_2_2": -161.0018310546875, "logps_train/policy_2_w": -148.33941650390625, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 3.0751724243164062, "rewards_train/1-l": -2.1249098777770996, "rewards_train/1-w": 3.414834976196289, "rewards_train/2-2": 4.477941513061523, "rewards_train/2-w": 2.3215270042419434, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.539744853973389, "rewards_train/margins_1": 0.3396625518798828, "rewards_train/margins_2": 2.15641450881958, "step": 477 }, { "epoch": 1.43, "logps_train/policy_1_2": -213.43331909179688, "logps_train/policy_1_l": -203.2046661376953, "logps_train/policy_1_w": -151.5210418701172, "logps_train/policy_2_2": -160.2969512939453, "logps_train/policy_2_w": -211.31219482421875, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -203.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -239.0, "rewards_train/1-2": 1.6472928524017334, "rewards_train/1-l": -2.124373197555542, "rewards_train/1-w": 5.161958694458008, "rewards_train/2-2": 3.748429775238037, "rewards_train/2-w": 2.7390942573547363, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.28633189201355, "rewards_train/margins_1": 3.5146658420562744, "rewards_train/margins_2": 1.0093355178833008, "step": 477 }, { "epoch": 1.43, "logps_train/policy_1_2": -196.75790405273438, "logps_train/policy_1_l": -206.63787841796875, "logps_train/policy_1_w": -163.05319213867188, "logps_train/policy_2_2": -164.47315979003906, "logps_train/policy_2_w": -194.3072509765625, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -203.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 3.105459690093994, "rewards_train/1-l": -3.058905601501465, "rewards_train/1-w": 3.322805643081665, "rewards_train/2-2": 3.791550397872925, "rewards_train/2-w": 2.0097053050994873, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.38171124458313, "rewards_train/margins_1": 0.2173459529876709, "rewards_train/margins_2": 1.7818450927734375, "step": 477 }, { "epoch": 1.43, "learning_rate": 1.0425513022138203e-06, "loss": 0.4204, "step": 478 }, { "epoch": 1.43, "logps_train/policy_1_2": -142.51004028320312, "logps_train/policy_1_l": -237.67755126953125, "logps_train/policy_1_w": -107.68214416503906, "logps_train/policy_2_2": -124.01676940917969, "logps_train/policy_2_w": -135.67425537109375, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.6286839246749878, "rewards_train/1-l": -3.959942579269409, "rewards_train/1-w": 3.4520978927612305, "rewards_train/2-2": 2.532698631286621, "rewards_train/2-w": 2.5013251304626465, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 7.41204047203064, "rewards_train/margins_1": 1.8234139680862427, "rewards_train/margins_2": 0.03137350082397461, "step": 478 }, { "epoch": 1.43, "logps_train/policy_1_2": -97.68680572509766, "logps_train/policy_1_l": -124.4931640625, "logps_train/policy_1_w": -140.7937774658203, "logps_train/policy_2_2": -65.71024322509766, "logps_train/policy_2_w": -188.7384490966797, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -91.5, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.3297572135925293, "rewards_train/1-l": -1.7672841548919678, "rewards_train/1-w": 3.5405447483062744, "rewards_train/2-2": 2.5781941413879395, "rewards_train/2-w": 1.0400216579437256, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.307828903198242, "rewards_train/margins_1": 2.210787534713745, "rewards_train/margins_2": 1.5381724834442139, "step": 478 }, { "epoch": 1.43, "logps_train/policy_1_2": -97.17469787597656, "logps_train/policy_1_l": -107.48988342285156, "logps_train/policy_1_w": -94.43378448486328, "logps_train/policy_2_2": -82.52735137939453, "logps_train/policy_2_w": -122.91642761230469, "logps_train/ref_1_2": -110.5, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.3379987478256226, "rewards_train/1-l": -1.3364883661270142, "rewards_train/1-w": 3.2363088130950928, "rewards_train/2-2": 1.9105461835861206, "rewards_train/2-w": 1.848982572555542, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.572797179222107, "rewards_train/margins_1": 1.8983100652694702, "rewards_train/margins_2": 0.06156361103057861, "step": 478 }, { "epoch": 1.43, "logps_train/policy_1_2": -179.1072540283203, "logps_train/policy_1_l": -186.05259704589844, "logps_train/policy_1_w": -114.10540771484375, "logps_train/policy_2_2": -137.22476196289062, "logps_train/policy_2_w": -145.35699462890625, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.0658371448516846, "rewards_train/1-l": -1.8630731105804443, "rewards_train/1-w": 3.066802978515625, "rewards_train/2-2": 2.7380716800689697, "rewards_train/2-w": 2.003363847732544, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.929876089096069, "rewards_train/margins_1": 2.0009658336639404, "rewards_train/margins_2": 0.7347078323364258, "step": 478 }, { "epoch": 1.43, "logps_train/policy_1_2": -138.13429260253906, "logps_train/policy_1_l": -114.97476959228516, "logps_train/policy_1_w": -61.09943389892578, "logps_train/policy_2_2": -92.98078918457031, "logps_train/policy_2_w": -91.71704864501953, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -89.5, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -106.0, "rewards_train/1-2": 1.023289680480957, "rewards_train/1-l": -1.8920081853866577, "rewards_train/1-w": 2.850212574005127, "rewards_train/2-2": 2.7191085815429688, "rewards_train/2-w": 1.3775140047073364, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.742220759391785, "rewards_train/margins_1": 1.82692289352417, "rewards_train/margins_2": 1.3415945768356323, "step": 478 }, { "epoch": 1.43, "logps_train/policy_1_2": -179.14743041992188, "logps_train/policy_1_l": -178.1027069091797, "logps_train/policy_1_w": -142.445556640625, "logps_train/policy_2_2": -134.4921875, "logps_train/policy_2_w": -201.3848419189453, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 1.9711947441101074, "rewards_train/1-l": -1.129216194152832, "rewards_train/1-w": 3.5132570266723633, "rewards_train/2-2": 3.13828182220459, "rewards_train/2-w": 1.450578212738037, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.642473220825195, "rewards_train/margins_1": 1.5420622825622559, "rewards_train/margins_2": 1.6877036094665527, "step": 478 }, { "epoch": 1.43, "logps_train/policy_1_2": -185.55126953125, "logps_train/policy_1_l": -183.52322387695312, "logps_train/policy_1_w": -123.91654968261719, "logps_train/policy_2_2": -147.42282104492188, "logps_train/policy_2_w": -158.79086303710938, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 2.0210437774658203, "rewards_train/1-l": -2.274976968765259, "rewards_train/1-w": 3.604438543319702, "rewards_train/2-2": 3.699514389038086, "rewards_train/2-w": 2.0802884101867676, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.879415512084961, "rewards_train/margins_1": 1.5833947658538818, "rewards_train/margins_2": 1.6192259788513184, "step": 478 }, { "epoch": 1.43, "logps_train/policy_1_2": -55.83153533935547, "logps_train/policy_1_l": -65.06672668457031, "logps_train/policy_1_w": -35.819183349609375, "logps_train/policy_2_2": -40.917030334472656, "logps_train/policy_2_w": -50.10499954223633, "logps_train/ref_1_2": -66.5, "logps_train/ref_1_l": -52.0, "logps_train/ref_1_w": -55.5, "logps_train/ref_2_2": -57.5, "logps_train/ref_2_w": -61.0, "rewards_train/1-2": 1.0848150253295898, "rewards_train/1-l": -1.315119981765747, "rewards_train/1-w": 1.9805819988250732, "rewards_train/2-2": 1.6551718711853027, "rewards_train/2-w": 1.0699690580368042, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2957019805908203, "rewards_train/margins_1": 0.8957669734954834, "rewards_train/margins_2": 0.5852028131484985, "step": 478 }, { "epoch": 1.43, "logps_train/policy_1_2": -191.88925170898438, "logps_train/policy_1_l": -199.7164306640625, "logps_train/policy_1_w": -135.50750732421875, "logps_train/policy_2_2": -150.66978454589844, "logps_train/policy_2_w": -183.49362182617188, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 2.4142005443573, "rewards_train/1-l": -2.3778939247131348, "rewards_train/1-w": 3.555497884750366, "rewards_train/2-2": 3.786146640777588, "rewards_train/2-w": 1.9412627220153809, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.933391809463501, "rewards_train/margins_1": 1.1412973403930664, "rewards_train/margins_2": 1.844883918762207, "step": 479 }, { "epoch": 1.43, "logps_train/policy_1_2": -214.20480346679688, "logps_train/policy_1_l": -119.61566162109375, "logps_train/policy_1_w": -89.89103698730469, "logps_train/policy_2_2": -159.4963836669922, "logps_train/policy_2_w": -123.5134048461914, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 0.8420203924179077, "rewards_train/1-l": -2.050628900527954, "rewards_train/1-w": 3.074324131011963, "rewards_train/2-2": 2.9831738471984863, "rewards_train/2-w": 1.7790310382843018, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.124953031539917, "rewards_train/margins_1": 2.232303738594055, "rewards_train/margins_2": 1.2041428089141846, "step": 479 }, { "epoch": 1.43, "logps_train/policy_1_2": -196.62130737304688, "logps_train/policy_1_l": -202.3948516845703, "logps_train/policy_1_w": -179.328125, "logps_train/policy_2_2": -165.64035034179688, "logps_train/policy_2_w": -211.82449340820312, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 2.67536997795105, "rewards_train/1-l": -2.5676093101501465, "rewards_train/1-w": 4.123438835144043, "rewards_train/2-2": 3.942216157913208, "rewards_train/2-w": 2.542550563812256, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.6910481452941895, "rewards_train/margins_1": 1.4480688571929932, "rewards_train/margins_2": 1.3996655941009521, "step": 479 }, { "epoch": 1.43, "logps_train/policy_1_2": -165.7283172607422, "logps_train/policy_1_l": -206.38345336914062, "logps_train/policy_1_w": -132.2548370361328, "logps_train/policy_2_2": -130.2783966064453, "logps_train/policy_2_w": -175.92330932617188, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": 2.3400590419769287, "rewards_train/1-l": -3.328383684158325, "rewards_train/1-w": 3.5995163917541504, "rewards_train/2-2": 3.3598551750183105, "rewards_train/2-w": 1.9701690673828125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.927900075912476, "rewards_train/margins_1": 1.2594573497772217, "rewards_train/margins_2": 1.389686107635498, "step": 479 }, { "epoch": 1.43, "logps_train/policy_1_2": -120.45704650878906, "logps_train/policy_1_l": -164.6385955810547, "logps_train/policy_1_w": -105.40921020507812, "logps_train/policy_2_2": -100.7740707397461, "logps_train/policy_2_w": -132.29579162597656, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 1.951951265335083, "rewards_train/1-l": -1.8116862773895264, "rewards_train/1-w": 2.5536093711853027, "rewards_train/2-2": 2.516342878341675, "rewards_train/2-w": 1.657139778137207, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.365295648574829, "rewards_train/margins_1": 0.6016581058502197, "rewards_train/margins_2": 0.8592031002044678, "step": 479 }, { "epoch": 1.43, "logps_train/policy_1_2": -187.53782653808594, "logps_train/policy_1_l": -190.42884826660156, "logps_train/policy_1_w": -116.56311798095703, "logps_train/policy_2_2": -146.86106872558594, "logps_train/policy_2_w": -148.75491333007812, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.9774681329727173, "rewards_train/1-l": -1.7921051979064941, "rewards_train/1-w": 2.840172290802002, "rewards_train/2-2": 3.523268461227417, "rewards_train/2-w": 1.5225558280944824, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.632277488708496, "rewards_train/margins_1": 0.8627041578292847, "rewards_train/margins_2": 2.0007126331329346, "step": 479 }, { "epoch": 1.43, "logps_train/policy_1_2": -118.84892272949219, "logps_train/policy_1_l": -145.69024658203125, "logps_train/policy_1_w": -57.83708572387695, "logps_train/policy_2_2": -92.7578353881836, "logps_train/policy_2_w": -77.75775146484375, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": 1.4588582515716553, "rewards_train/1-l": -2.299687623977661, "rewards_train/1-w": 2.0024242401123047, "rewards_train/2-2": 2.4246068000793457, "rewards_train/2-w": 1.5105526447296143, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.302111864089966, "rewards_train/margins_1": 0.5435659885406494, "rewards_train/margins_2": 0.9140541553497314, "step": 479 }, { "epoch": 1.43, "logps_train/policy_1_2": -84.51026153564453, "logps_train/policy_1_l": -85.0089111328125, "logps_train/policy_1_w": -76.67111206054688, "logps_train/policy_2_2": -66.84062957763672, "logps_train/policy_2_w": -93.89923858642578, "logps_train/ref_1_2": -102.5, "logps_train/ref_1_l": -74.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -90.5, "logps_train/ref_2_w": -110.5, "rewards_train/1-2": 1.7880361080169678, "rewards_train/1-l": -1.0403449535369873, "rewards_train/1-w": 2.4746859073638916, "rewards_train/2-2": 2.3792178630828857, "rewards_train/2-w": 1.6405452489852905, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.515030860900879, "rewards_train/margins_1": 0.6866497993469238, "rewards_train/margins_2": 0.7386726140975952, "step": 479 }, { "epoch": 1.44, "learning_rate": 1.0225559048738548e-06, "loss": 0.4085, "step": 480 }, { "epoch": 1.44, "logps_train/policy_1_2": -178.81674194335938, "logps_train/policy_1_l": -209.33193969726562, "logps_train/policy_1_w": -135.9443817138672, "logps_train/policy_2_2": -132.35504150390625, "logps_train/policy_2_w": -188.28741455078125, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.7323881387710571, "rewards_train/1-l": -2.00116229057312, "rewards_train/1-w": 3.1344685554504395, "rewards_train/2-2": 3.1223087310791016, "rewards_train/2-w": 1.3946974277496338, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.13563084602356, "rewards_train/margins_1": 1.4020804166793823, "rewards_train/margins_2": 1.7276113033294678, "step": 480 }, { "epoch": 1.44, "logps_train/policy_1_2": -85.06698608398438, "logps_train/policy_1_l": -85.03935241699219, "logps_train/policy_1_w": -41.62138366699219, "logps_train/policy_2_2": -66.65951538085938, "logps_train/policy_2_w": -75.00395202636719, "logps_train/ref_1_2": -94.5, "logps_train/ref_1_l": -71.5, "logps_train/ref_1_w": -62.5, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -83.0, "rewards_train/1-2": 0.942129373550415, "rewards_train/1-l": -1.344560146331787, "rewards_train/1-w": 2.070674419403076, "rewards_train/2-2": 1.8411529064178467, "rewards_train/2-w": 0.7574175596237183, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4152345657348633, "rewards_train/margins_1": 1.1285450458526611, "rewards_train/margins_2": 1.0837353467941284, "step": 480 }, { "epoch": 1.44, "logps_train/policy_1_2": -100.48747253417969, "logps_train/policy_1_l": -87.00108337402344, "logps_train/policy_1_w": -86.64842987060547, "logps_train/policy_2_2": -75.05143737792969, "logps_train/policy_2_w": -126.34971618652344, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -74.5, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -98.5, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.1981275081634521, "rewards_train/1-l": -1.255577564239502, "rewards_train/1-w": 2.5890626907348633, "rewards_train/2-2": 2.325324773788452, "rewards_train/2-w": 0.9720600843429565, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.8446402549743652, "rewards_train/margins_1": 1.3909351825714111, "rewards_train/margins_2": 1.3532646894454956, "step": 480 }, { "epoch": 1.44, "logps_train/policy_1_2": -175.18826293945312, "logps_train/policy_1_l": -254.45431518554688, "logps_train/policy_1_w": -137.5647735595703, "logps_train/policy_2_2": -145.27505493164062, "logps_train/policy_2_w": -174.58355712890625, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -235.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.440548300743103, "rewards_train/1-l": -1.9973853826522827, "rewards_train/1-w": 3.777897357940674, "rewards_train/2-2": 2.2413907051086426, "rewards_train/2-w": 2.5635180473327637, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.7752827405929565, "rewards_train/margins_1": 2.337349057197571, "rewards_train/margins_2": -0.3221273422241211, "step": 480 }, { "epoch": 1.44, "logps_train/policy_1_2": -88.25163269042969, "logps_train/policy_1_l": -110.6263656616211, "logps_train/policy_1_w": -35.24909591674805, "logps_train/policy_2_2": -65.1619873046875, "logps_train/policy_2_w": -52.86841583251953, "logps_train/ref_1_2": -98.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -50.0, "logps_train/ref_2_2": -84.5, "logps_train/ref_2_w": -62.0, "rewards_train/1-2": 0.9576497673988342, "rewards_train/1-l": -1.8046776056289673, "rewards_train/1-w": 1.4960377216339111, "rewards_train/2-2": 1.9213013648986816, "rewards_train/2-w": 0.9014394879341125, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.3007153272628784, "rewards_train/margins_1": 0.5383879542350769, "rewards_train/margins_2": 1.019861876964569, "step": 480 }, { "epoch": 1.44, "logps_train/policy_1_2": -98.72146606445312, "logps_train/policy_1_l": -83.70751190185547, "logps_train/policy_1_w": -66.09944915771484, "logps_train/policy_2_2": -79.30436706542969, "logps_train/policy_2_w": -80.3237075805664, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -67.0, "logps_train/ref_1_w": -89.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -96.5, "rewards_train/1-2": 1.5200409889221191, "rewards_train/1-l": -1.6605221033096313, "rewards_train/1-w": 2.252164363861084, "rewards_train/2-2": 2.475813388824463, "rewards_train/2-w": 1.6117212772369385, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9126864671707153, "rewards_train/margins_1": 0.7321233749389648, "rewards_train/margins_2": 0.8640921115875244, "step": 480 }, { "epoch": 1.44, "logps_train/policy_1_2": -280.53875732421875, "logps_train/policy_1_l": -209.1448211669922, "logps_train/policy_1_w": -238.2640380859375, "logps_train/policy_2_2": -224.9484405517578, "logps_train/policy_2_w": -299.9955749511719, "logps_train/ref_1_2": -304.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -288.0, "logps_train/ref_2_2": -276.0, "logps_train/ref_2_w": -324.0, "rewards_train/1-2": 2.3554983139038086, "rewards_train/1-l": -2.2306935787200928, "rewards_train/1-w": 4.961094856262207, "rewards_train/2-2": 4.961406230926514, "rewards_train/2-w": 2.387942314147949, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.1917884349823, "rewards_train/margins_1": 2.6055965423583984, "rewards_train/margins_2": 2.5734639167785645, "step": 480 }, { "epoch": 1.44, "logps_train/policy_1_2": -183.67434692382812, "logps_train/policy_1_l": -187.96566772460938, "logps_train/policy_1_w": -106.64883422851562, "logps_train/policy_2_2": -144.47808837890625, "logps_train/policy_2_w": -141.9766845703125, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.7388155460357666, "rewards_train/1-l": -2.6450047492980957, "rewards_train/1-w": 3.008944511413574, "rewards_train/2-2": 3.5021915435791016, "rewards_train/2-w": 1.5788943767547607, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.65394926071167, "rewards_train/margins_1": 1.2701289653778076, "rewards_train/margins_2": 1.9232971668243408, "step": 480 }, { "epoch": 1.44, "logps_train/policy_1_2": -137.24185180664062, "logps_train/policy_1_l": -208.05740356445312, "logps_train/policy_1_w": -159.16058349609375, "logps_train/policy_2_2": -114.2035140991211, "logps_train/policy_2_w": -189.23387145996094, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 2.200814723968506, "rewards_train/1-l": -2.807302951812744, "rewards_train/1-w": 4.055815696716309, "rewards_train/2-2": 3.044492244720459, "rewards_train/2-w": 2.920363187789917, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.863118648529053, "rewards_train/margins_1": 1.8550009727478027, "rewards_train/margins_2": 0.12412905693054199, "step": 481 }, { "epoch": 1.44, "logps_train/policy_1_2": -230.12632751464844, "logps_train/policy_1_l": -198.45632934570312, "logps_train/policy_1_w": -201.54818725585938, "logps_train/policy_2_2": -191.33740234375, "logps_train/policy_2_w": -240.57327270507812, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -245.0, "logps_train/ref_2_2": -224.0, "logps_train/ref_2_w": -270.0, "rewards_train/1-2": 1.9311178922653198, "rewards_train/1-l": -1.449540138244629, "rewards_train/1-w": 4.299867630004883, "rewards_train/2-2": 3.273681163787842, "rewards_train/2-w": 2.9114208221435547, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.749407768249512, "rewards_train/margins_1": 2.368749737739563, "rewards_train/margins_2": 0.3622603416442871, "step": 481 }, { "epoch": 1.44, "logps_train/policy_1_2": -243.29116821289062, "logps_train/policy_1_l": -255.9972381591797, "logps_train/policy_1_w": -172.20233154296875, "logps_train/policy_2_2": -204.84889221191406, "logps_train/policy_2_w": -224.519287109375, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -233.0, "logps_train/ref_1_w": -211.0, "logps_train/ref_2_2": -247.0, "logps_train/ref_2_w": -245.0, "rewards_train/1-2": 2.4247875213623047, "rewards_train/1-l": -2.3012866973876953, "rewards_train/1-w": 3.843827724456787, "rewards_train/2-2": 4.220579147338867, "rewards_train/2-w": 2.034790515899658, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.145114421844482, "rewards_train/margins_1": 1.4190402030944824, "rewards_train/margins_2": 2.185788631439209, "step": 481 }, { "epoch": 1.44, "logps_train/policy_1_2": -109.22775268554688, "logps_train/policy_1_l": -91.56819152832031, "logps_train/policy_1_w": -87.4521713256836, "logps_train/policy_2_2": -85.82633972167969, "logps_train/policy_2_w": -115.75318908691406, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -124.5, "rewards_train/1-2": 1.2069125175476074, "rewards_train/1-l": -0.9279131889343262, "rewards_train/1-w": 2.180173397064209, "rewards_train/2-2": 2.1462719440460205, "rewards_train/2-w": 0.8850330710411072, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.108086585998535, "rewards_train/margins_1": 0.9732608795166016, "rewards_train/margins_2": 1.2612388730049133, "step": 481 }, { "epoch": 1.44, "logps_train/policy_1_2": -123.08332824707031, "logps_train/policy_1_l": -171.07737731933594, "logps_train/policy_1_w": -118.74810791015625, "logps_train/policy_2_2": -97.3368148803711, "logps_train/policy_2_w": -155.31578063964844, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -121.5, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.4236979484558105, "rewards_train/1-l": -2.962620258331299, "rewards_train/1-w": 2.234955310821533, "rewards_train/2-2": 2.420225143432617, "rewards_train/2-w": 0.8606094121932983, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.197575569152832, "rewards_train/margins_1": 0.8112573623657227, "rewards_train/margins_2": 1.5596157312393188, "step": 481 }, { "epoch": 1.44, "logps_train/policy_1_2": -158.5316619873047, "logps_train/policy_1_l": -177.23126220703125, "logps_train/policy_1_w": -80.54417419433594, "logps_train/policy_2_2": -119.22703552246094, "logps_train/policy_2_w": -112.79651641845703, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.5343343019485474, "rewards_train/1-l": -2.861994743347168, "rewards_train/1-w": 2.354957342147827, "rewards_train/2-2": 2.9368667602539062, "rewards_train/2-w": 1.270348310470581, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.216952085494995, "rewards_train/margins_1": 0.8206230401992798, "rewards_train/margins_2": 1.6665184497833252, "step": 481 }, { "epoch": 1.44, "logps_train/policy_1_2": -85.44244384765625, "logps_train/policy_1_l": -102.30408477783203, "logps_train/policy_1_w": -57.44804000854492, "logps_train/policy_2_2": -68.556884765625, "logps_train/policy_2_w": -78.62181091308594, "logps_train/ref_1_2": -95.5, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -82.5, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -97.0, "rewards_train/1-2": 1.0106384754180908, "rewards_train/1-l": -1.499549150466919, "rewards_train/1-w": 2.5024614334106445, "rewards_train/2-2": 1.7384766340255737, "rewards_train/2-w": 1.8382091522216797, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.0020105838775635, "rewards_train/margins_1": 1.4918229579925537, "rewards_train/margins_2": -0.09973251819610596, "step": 481 }, { "epoch": 1.44, "logps_train/policy_1_2": -209.49578857421875, "logps_train/policy_1_l": -167.86851501464844, "logps_train/policy_1_w": -177.73025512695312, "logps_train/policy_2_2": -172.96652221679688, "logps_train/policy_2_w": -220.189697265625, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -211.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 1.8140928745269775, "rewards_train/1-l": -2.096616744995117, "rewards_train/1-w": 4.559787273406982, "rewards_train/2-2": 3.806863784790039, "rewards_train/2-w": 2.763841152191162, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.6564040184021, "rewards_train/margins_1": 2.745694398880005, "rewards_train/margins_2": 1.043022632598877, "step": 481 }, { "epoch": 1.44, "learning_rate": 1.002704703473e-06, "loss": 0.4922, "step": 482 }, { "epoch": 1.44, "logps_train/policy_1_2": -118.9184341430664, "logps_train/policy_1_l": -187.07777404785156, "logps_train/policy_1_w": -108.68623352050781, "logps_train/policy_2_2": -92.06715393066406, "logps_train/policy_2_w": -153.95703125, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.2245628833770752, "rewards_train/1-l": -2.9480130672454834, "rewards_train/1-w": 2.776688575744629, "rewards_train/2-2": 2.150315761566162, "rewards_train/2-w": 1.193359136581421, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.724701642990112, "rewards_train/margins_1": 1.5521256923675537, "rewards_train/margins_2": 0.9569566249847412, "step": 482 }, { "epoch": 1.44, "logps_train/policy_1_2": -173.64398193359375, "logps_train/policy_1_l": -184.3013916015625, "logps_train/policy_1_w": -195.31182861328125, "logps_train/policy_2_2": -131.56430053710938, "logps_train/policy_2_w": -254.139404296875, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -234.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 2.4152891635894775, "rewards_train/1-l": -2.3207643032073975, "rewards_train/1-w": 3.8281922340393066, "rewards_train/2-2": 3.5357584953308105, "rewards_train/2-w": 1.292309284210205, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.148956537246704, "rewards_train/margins_1": 1.412903070449829, "rewards_train/margins_2": 2.2434492111206055, "step": 482 }, { "epoch": 1.44, "logps_train/policy_1_2": -110.23565673828125, "logps_train/policy_1_l": -102.82635498046875, "logps_train/policy_1_w": -74.6697769165039, "logps_train/policy_2_2": -81.50559997558594, "logps_train/policy_2_w": -106.64016723632812, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -99.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 0.9779973030090332, "rewards_train/1-l": -2.1595888137817383, "rewards_train/1-w": 2.4530906677246094, "rewards_train/2-2": 2.330690383911133, "rewards_train/2-w": 1.1180139780044556, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.612679481506348, "rewards_train/margins_1": 1.4750933647155762, "rewards_train/margins_2": 1.2126764059066772, "step": 482 }, { "epoch": 1.44, "logps_train/policy_1_2": -180.4049072265625, "logps_train/policy_1_l": -149.0919952392578, "logps_train/policy_1_w": -243.04183959960938, "logps_train/policy_2_2": -149.397705078125, "logps_train/policy_2_w": -285.9659118652344, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -284.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -308.0, "rewards_train/1-2": 1.806383728981018, "rewards_train/1-l": -1.2615437507629395, "rewards_train/1-w": 4.085267543792725, "rewards_train/2-2": 3.2313232421875, "rewards_train/2-w": 2.2811429500579834, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.346811294555664, "rewards_train/margins_1": 2.2788838148117065, "rewards_train/margins_2": 0.9501802921295166, "step": 482 }, { "epoch": 1.44, "logps_train/policy_1_2": -132.6038055419922, "logps_train/policy_1_l": -148.07876586914062, "logps_train/policy_1_w": -95.61809539794922, "logps_train/policy_2_2": -99.69139099121094, "logps_train/policy_2_w": -117.08142852783203, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.696650505065918, "rewards_train/1-l": -1.5367825031280518, "rewards_train/1-w": 2.693659782409668, "rewards_train/2-2": 2.8035173416137695, "rewards_train/2-w": 1.8868768215179443, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.23044228553772, "rewards_train/margins_1": 0.99700927734375, "rewards_train/margins_2": 0.9166405200958252, "step": 482 }, { "epoch": 1.44, "logps_train/policy_1_2": -238.98219299316406, "logps_train/policy_1_l": -255.55116271972656, "logps_train/policy_1_w": -161.04861450195312, "logps_train/policy_2_2": -199.21768188476562, "logps_train/policy_2_w": -211.25631713867188, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -234.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -235.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 1.9791247844696045, "rewards_train/1-l": -2.1947648525238037, "rewards_train/1-w": 3.520138740539551, "rewards_train/2-2": 3.559873104095459, "rewards_train/2-w": 1.6329622268676758, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.7149035930633545, "rewards_train/margins_1": 1.5410139560699463, "rewards_train/margins_2": 1.9269108772277832, "step": 482 }, { "epoch": 1.44, "logps_train/policy_1_2": -174.6197509765625, "logps_train/policy_1_l": -168.21115112304688, "logps_train/policy_1_w": -76.51042938232422, "logps_train/policy_2_2": -146.02499389648438, "logps_train/policy_2_w": -99.81288146972656, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.5028679370880127, "rewards_train/1-l": -2.923457622528076, "rewards_train/1-w": 3.110285758972168, "rewards_train/2-2": 2.8826568126678467, "rewards_train/2-w": 2.353085994720459, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.033743381500244, "rewards_train/margins_1": 1.6074178218841553, "rewards_train/margins_2": 0.5295708179473877, "step": 482 }, { "epoch": 1.44, "logps_train/policy_1_2": -97.07395935058594, "logps_train/policy_1_l": -107.69987487792969, "logps_train/policy_1_w": -73.60971069335938, "logps_train/policy_2_2": -65.43125915527344, "logps_train/policy_2_w": -105.97454833984375, "logps_train/ref_1_2": -111.5, "logps_train/ref_1_l": -89.5, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -90.5, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 1.4394797086715698, "rewards_train/1-l": -1.8098318576812744, "rewards_train/1-w": 3.1577789783477783, "rewards_train/2-2": 2.5146865844726562, "rewards_train/2-w": 1.8025448322296143, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.967610836029053, "rewards_train/margins_1": 1.7182992696762085, "rewards_train/margins_2": 0.712141752243042, "step": 482 }, { "epoch": 1.45, "logps_train/policy_1_2": -158.39581298828125, "logps_train/policy_1_l": -189.6365203857422, "logps_train/policy_1_w": -133.6453094482422, "logps_train/policy_2_2": -123.93067169189453, "logps_train/policy_2_w": -172.1751251220703, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.4541682004928589, "rewards_train/1-l": -1.7706836462020874, "rewards_train/1-w": 3.3315632343292236, "rewards_train/2-2": 2.941307783126831, "rewards_train/2-w": 1.9074879884719849, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.102246880531311, "rewards_train/margins_1": 1.8773950338363647, "rewards_train/margins_2": 1.0338197946548462, "step": 483 }, { "epoch": 1.45, "logps_train/policy_1_2": -103.9498291015625, "logps_train/policy_1_l": -105.20246124267578, "logps_train/policy_1_w": -73.45172119140625, "logps_train/policy_2_2": -80.25907135009766, "logps_train/policy_2_w": -101.78150177001953, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -86.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 1.3402705192565918, "rewards_train/1-l": -1.8616520166397095, "rewards_train/1-w": 2.7897889614105225, "rewards_train/2-2": 2.3904995918273926, "rewards_train/2-w": 1.4530999660491943, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.651440978050232, "rewards_train/margins_1": 1.4495184421539307, "rewards_train/margins_2": 0.9373996257781982, "step": 483 }, { "epoch": 1.45, "logps_train/policy_1_2": -128.9342041015625, "logps_train/policy_1_l": -142.03988647460938, "logps_train/policy_1_w": -97.94137573242188, "logps_train/policy_2_2": -103.60535430908203, "logps_train/policy_2_w": -126.49028778076172, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.4511103630065918, "rewards_train/1-l": -2.9239115715026855, "rewards_train/1-w": 3.0199246406555176, "rewards_train/2-2": 2.865440845489502, "rewards_train/2-w": 1.789252758026123, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.943836212158203, "rewards_train/margins_1": 1.5688142776489258, "rewards_train/margins_2": 1.076188087463379, "step": 483 }, { "epoch": 1.45, "logps_train/policy_1_2": -172.15499877929688, "logps_train/policy_1_l": -229.5345458984375, "logps_train/policy_1_w": -184.78933715820312, "logps_train/policy_2_2": -132.16004943847656, "logps_train/policy_2_w": -236.08920288085938, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -223.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 2.2313742637634277, "rewards_train/1-l": -2.744859218597412, "rewards_train/1-w": 3.8538784980773926, "rewards_train/2-2": 3.6269640922546387, "rewards_train/2-w": 1.7848291397094727, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.598737716674805, "rewards_train/margins_1": 1.6225042343139648, "rewards_train/margins_2": 1.842134952545166, "step": 483 }, { "epoch": 1.45, "logps_train/policy_1_2": -142.61587524414062, "logps_train/policy_1_l": -170.24432373046875, "logps_train/policy_1_w": -122.85357666015625, "logps_train/policy_2_2": -118.88299560546875, "logps_train/policy_2_w": -160.1438751220703, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": 1.595445156097412, "rewards_train/1-l": -2.1836109161376953, "rewards_train/1-w": 3.5177671909332275, "rewards_train/2-2": 2.733185052871704, "rewards_train/2-w": 2.099675416946411, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.701378107070923, "rewards_train/margins_1": 1.9223220348358154, "rewards_train/margins_2": 0.633509635925293, "step": 483 }, { "epoch": 1.45, "logps_train/policy_1_2": -243.97747802734375, "logps_train/policy_1_l": -235.21591186523438, "logps_train/policy_1_w": -177.5978240966797, "logps_train/policy_2_2": -202.28977966308594, "logps_train/policy_2_w": -219.3311767578125, "logps_train/ref_1_2": -276.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -253.0, "logps_train/ref_2_w": -243.0, "rewards_train/1-2": 3.3085014820098877, "rewards_train/1-l": -3.8090906143188477, "rewards_train/1-w": 3.9472484588623047, "rewards_train/2-2": 5.0710225105285645, "rewards_train/2-w": 2.3403191566467285, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.756339073181152, "rewards_train/margins_1": 0.638746976852417, "rewards_train/margins_2": 2.730703353881836, "step": 483 }, { "epoch": 1.45, "logps_train/policy_1_2": -99.59195709228516, "logps_train/policy_1_l": -78.88639068603516, "logps_train/policy_1_w": -83.77008056640625, "logps_train/policy_2_2": -62.539058685302734, "logps_train/policy_2_w": -140.06480407714844, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -69.5, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.2720541954040527, "rewards_train/1-l": -0.9187172651290894, "rewards_train/1-w": 3.2573673725128174, "rewards_train/2-2": 2.3507814407348633, "rewards_train/2-w": 0.5919574499130249, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.176084637641907, "rewards_train/margins_1": 1.9853131771087646, "rewards_train/margins_2": 1.7588239908218384, "step": 483 }, { "epoch": 1.45, "logps_train/policy_1_2": -153.02645874023438, "logps_train/policy_1_l": -149.62120056152344, "logps_train/policy_1_w": -112.44725036621094, "logps_train/policy_2_2": -109.96212768554688, "logps_train/policy_2_w": -140.5420379638672, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 0.7801652550697327, "rewards_train/1-l": -2.1699323654174805, "rewards_train/1-w": 3.294337749481201, "rewards_train/2-2": 2.7842559814453125, "rewards_train/2-w": 2.0582966804504395, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.464270114898682, "rewards_train/margins_1": 2.5141724944114685, "rewards_train/margins_2": 0.725959300994873, "step": 483 }, { "epoch": 1.45, "learning_rate": 9.829996354535173e-07, "loss": 0.3834, "step": 484 }, { "epoch": 1.45, "logps_train/policy_1_2": -125.99737548828125, "logps_train/policy_1_l": -90.9971694946289, "logps_train/policy_1_w": -73.32574462890625, "logps_train/policy_2_2": -93.63346099853516, "logps_train/policy_2_w": -102.977783203125, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -94.0, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": 0.9035343527793884, "rewards_train/1-l": -1.3034765720367432, "rewards_train/1-w": 2.0988218784332275, "rewards_train/2-2": 2.2719080448150635, "rewards_train/2-w": 1.1835696697235107, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4022984504699707, "rewards_train/margins_1": 1.1952875256538391, "rewards_train/margins_2": 1.0883383750915527, "step": 484 }, { "epoch": 1.45, "logps_train/policy_1_2": -130.15969848632812, "logps_train/policy_1_l": -197.9806671142578, "logps_train/policy_1_w": -146.31753540039062, "logps_train/policy_2_2": -97.50926971435547, "logps_train/policy_2_w": -194.30477905273438, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 2.3355937004089355, "rewards_train/1-l": -3.2324419021606445, "rewards_train/1-w": 4.151059627532959, "rewards_train/2-2": 2.9631354808807373, "rewards_train/2-w": 1.9195228815078735, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.3835015296936035, "rewards_train/margins_1": 1.8154659271240234, "rewards_train/margins_2": 1.0436125993728638, "step": 484 }, { "epoch": 1.45, "logps_train/policy_1_2": -137.05555725097656, "logps_train/policy_1_l": -184.5899658203125, "logps_train/policy_1_w": -131.4522705078125, "logps_train/policy_2_2": -92.6336441040039, "logps_train/policy_2_w": -193.82635498046875, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 2.0444440841674805, "rewards_train/1-l": -2.46212100982666, "rewards_train/1-w": 3.9922728538513184, "rewards_train/2-2": 3.2936670780181885, "rewards_train/2-w": 1.2611143589019775, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.4543938636779785, "rewards_train/margins_1": 1.947828769683838, "rewards_train/margins_2": 2.032552719116211, "step": 484 }, { "epoch": 1.45, "logps_train/policy_1_2": -260.9385070800781, "logps_train/policy_1_l": -207.34365844726562, "logps_train/policy_1_w": -136.73095703125, "logps_train/policy_2_2": -216.81192016601562, "logps_train/policy_2_w": -171.01268005371094, "logps_train/ref_1_2": -280.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -254.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.8373994827270508, "rewards_train/1-l": -2.4445223808288574, "rewards_train/1-w": 3.617530345916748, "rewards_train/2-2": 3.6063075065612793, "rewards_train/2-w": 2.4721693992614746, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.0620527267456055, "rewards_train/margins_1": 1.7801308631896973, "rewards_train/margins_2": 1.1341381072998047, "step": 484 }, { "epoch": 1.45, "logps_train/policy_1_2": -199.0567169189453, "logps_train/policy_1_l": -262.0972900390625, "logps_train/policy_1_w": -229.19155883789062, "logps_train/policy_2_2": -166.61056518554688, "logps_train/policy_2_w": -265.6119079589844, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -236.0, "logps_train/ref_1_w": -262.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -280.0, "rewards_train/1-2": 2.578702688217163, "rewards_train/1-l": -2.69722843170166, "rewards_train/1-w": 3.275763988494873, "rewards_train/2-2": 3.588942527770996, "rewards_train/2-w": 1.5239627361297607, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.972992420196533, "rewards_train/margins_1": 0.69706130027771, "rewards_train/margins_2": 2.0649797916412354, "step": 484 }, { "epoch": 1.45, "logps_train/policy_1_2": -156.0124969482422, "logps_train/policy_1_l": -189.985107421875, "logps_train/policy_1_w": -171.1789093017578, "logps_train/policy_2_2": -125.08338165283203, "logps_train/policy_2_w": -214.96365356445312, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -201.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 1.5717970132827759, "rewards_train/1-l": -1.8196048736572266, "rewards_train/1-w": 2.939923048019409, "rewards_train/2-2": 2.7545526027679443, "rewards_train/2-w": 1.0606653690338135, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.759527921676636, "rewards_train/margins_1": 1.3681260347366333, "rewards_train/margins_2": 1.6938872337341309, "step": 484 }, { "epoch": 1.45, "logps_train/policy_1_2": -200.33766174316406, "logps_train/policy_1_l": -130.90625, "logps_train/policy_1_w": -107.69444274902344, "logps_train/policy_2_2": -164.34417724609375, "logps_train/policy_2_w": -137.47471618652344, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.8881080150604248, "rewards_train/1-l": -1.4980462789535522, "rewards_train/1-w": 3.1975483894348145, "rewards_train/2-2": 3.5874578952789307, "rewards_train/2-w": 1.7091689109802246, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.695594668388367, "rewards_train/margins_1": 1.3094403743743896, "rewards_train/margins_2": 1.878288984298706, "step": 484 }, { "epoch": 1.45, "logps_train/policy_1_2": -128.0467987060547, "logps_train/policy_1_l": -127.748779296875, "logps_train/policy_1_w": -77.11014556884766, "logps_train/policy_2_2": -94.35420227050781, "logps_train/policy_2_w": -110.07500457763672, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 1.5187578201293945, "rewards_train/1-l": -1.8709723949432373, "rewards_train/1-w": 2.4456257820129395, "rewards_train/2-2": 2.913017749786377, "rewards_train/2-w": 1.1065618991851807, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.316598176956177, "rewards_train/margins_1": 0.9268679618835449, "rewards_train/margins_2": 1.8064558506011963, "step": 484 }, { "epoch": 1.45, "logps_train/policy_1_2": -153.2273712158203, "logps_train/policy_1_l": -201.7615966796875, "logps_train/policy_1_w": -106.86597442626953, "logps_train/policy_2_2": -121.63214111328125, "logps_train/policy_2_w": -136.815185546875, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.9678874015808105, "rewards_train/1-l": -2.754283905029297, "rewards_train/1-w": 3.16652774810791, "rewards_train/2-2": 3.057098388671875, "rewards_train/2-w": 2.1005115509033203, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.920811653137207, "rewards_train/margins_1": 1.1986403465270996, "rewards_train/margins_2": 0.9565868377685547, "step": 485 }, { "epoch": 1.45, "logps_train/policy_1_2": -187.0879669189453, "logps_train/policy_1_l": -226.03076171875, "logps_train/policy_1_w": -120.91094970703125, "logps_train/policy_2_2": -143.95095825195312, "logps_train/policy_2_w": -166.78814697265625, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 2.2412033081054688, "rewards_train/1-l": -2.7343263626098633, "rewards_train/1-w": 3.4495296478271484, "rewards_train/2-2": 3.692404270172119, "rewards_train/2-w": 1.8711857795715332, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.183856010437012, "rewards_train/margins_1": 1.2083263397216797, "rewards_train/margins_2": 1.821218490600586, "step": 485 }, { "epoch": 1.45, "logps_train/policy_1_2": -143.47918701171875, "logps_train/policy_1_l": -128.54483032226562, "logps_train/policy_1_w": -118.65929412841797, "logps_train/policy_2_2": -118.74433898925781, "logps_train/policy_2_w": -139.942138671875, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 2.198956251144409, "rewards_train/1-l": -2.076357364654541, "rewards_train/1-w": 2.8590705394744873, "rewards_train/2-2": 3.353692054748535, "rewards_train/2-w": 2.00266170501709, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.935427904129028, "rewards_train/margins_1": 0.6601142883300781, "rewards_train/margins_2": 1.3510303497314453, "step": 485 }, { "epoch": 1.45, "logps_train/policy_1_2": -134.43614196777344, "logps_train/policy_1_l": -123.39682006835938, "logps_train/policy_1_w": -66.30328369140625, "logps_train/policy_2_2": -83.74199676513672, "logps_train/policy_2_w": -93.77526092529297, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -109.5, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -106.5, "rewards_train/1-2": 0.21107268333435059, "rewards_train/1-l": -1.388704538345337, "rewards_train/1-w": 2.2450623512268066, "rewards_train/2-2": 2.2929887771606445, "rewards_train/2-w": 1.292394995689392, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6337668895721436, "rewards_train/margins_1": 2.033989667892456, "rewards_train/margins_2": 1.0005937814712524, "step": 485 }, { "epoch": 1.45, "logps_train/policy_1_2": -100.42625427246094, "logps_train/policy_1_l": -136.53155517578125, "logps_train/policy_1_w": -110.11905670166016, "logps_train/policy_2_2": -82.22480773925781, "logps_train/policy_2_w": -135.2681884765625, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 1.4157483577728271, "rewards_train/1-l": -2.292633295059204, "rewards_train/1-w": 3.0302813053131104, "rewards_train/2-2": 1.7703908681869507, "rewards_train/2-w": 1.3888063430786133, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.3229146003723145, "rewards_train/margins_1": 1.6145329475402832, "rewards_train/margins_2": 0.3815845251083374, "step": 485 }, { "epoch": 1.45, "logps_train/policy_1_2": -86.40021514892578, "logps_train/policy_1_l": -79.425048828125, "logps_train/policy_1_w": -96.67149353027344, "logps_train/policy_2_2": -62.09913635253906, "logps_train/policy_2_w": -133.77761840820312, "logps_train/ref_1_2": -101.5, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.5099788904190063, "rewards_train/1-l": -0.9221929311752319, "rewards_train/1-w": 2.8211312294006348, "rewards_train/2-2": 2.516648769378662, "rewards_train/2-w": 1.4316129684448242, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7433241605758667, "rewards_train/margins_1": 1.3111523389816284, "rewards_train/margins_2": 1.085035800933838, "step": 485 }, { "epoch": 1.45, "logps_train/policy_1_2": -120.22587585449219, "logps_train/policy_1_l": -113.65969848632812, "logps_train/policy_1_w": -91.51773071289062, "logps_train/policy_2_2": -86.69719696044922, "logps_train/policy_2_w": -120.65071868896484, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.9383495450019836, "rewards_train/1-l": -2.0926289558410645, "rewards_train/1-w": 2.9400243759155273, "rewards_train/2-2": 2.622467279434204, "rewards_train/2-w": 1.4943031072616577, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.032653331756592, "rewards_train/margins_1": 2.0016748309135437, "rewards_train/margins_2": 1.1281641721725464, "step": 485 }, { "epoch": 1.45, "logps_train/policy_1_2": -172.42431640625, "logps_train/policy_1_l": -198.24325561523438, "logps_train/policy_1_w": -122.66572570800781, "logps_train/policy_2_2": -135.919677734375, "logps_train/policy_2_w": -165.80331420898438, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.0653806924819946, "rewards_train/1-l": -2.4239354133605957, "rewards_train/1-w": 3.683426856994629, "rewards_train/2-2": 2.5888919830322266, "rewards_train/2-w": 1.9446684122085571, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.107362270355225, "rewards_train/margins_1": 2.6180461645126343, "rewards_train/margins_2": 0.6442235708236694, "step": 485 }, { "epoch": 1.46, "learning_rate": 9.634426239953074e-07, "loss": 0.3689, "step": 486 }, { "epoch": 1.46, "logps_train/policy_1_2": -163.0433807373047, "logps_train/policy_1_l": -191.4950408935547, "logps_train/policy_1_w": -175.47451782226562, "logps_train/policy_2_2": -140.58566284179688, "logps_train/policy_2_w": -207.95376586914062, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -235.0, "rewards_train/1-2": 2.730037212371826, "rewards_train/1-l": -1.8237228393554688, "rewards_train/1-w": 4.218172550201416, "rewards_train/2-2": 3.651003122329712, "rewards_train/2-w": 2.673372268676758, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.041895389556885, "rewards_train/margins_1": 1.4881353378295898, "rewards_train/margins_2": 0.9776308536529541, "step": 486 }, { "epoch": 1.46, "logps_train/policy_1_2": -95.85540008544922, "logps_train/policy_1_l": -151.01307678222656, "logps_train/policy_1_w": -87.59870147705078, "logps_train/policy_2_2": -70.90621948242188, "logps_train/policy_2_w": -119.36000061035156, "logps_train/ref_1_2": -107.5, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 1.153424859046936, "rewards_train/1-l": -2.7921290397644043, "rewards_train/1-w": 1.787785530090332, "rewards_train/2-2": 2.050589084625244, "rewards_train/2-w": 0.5616559386253357, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.579914569854736, "rewards_train/margins_1": 0.634360671043396, "rewards_train/margins_2": 1.4889331459999084, "step": 486 }, { "epoch": 1.46, "logps_train/policy_1_2": -101.11248779296875, "logps_train/policy_1_l": -144.3188934326172, "logps_train/policy_1_w": -39.318572998046875, "logps_train/policy_2_2": -74.48313903808594, "logps_train/policy_2_w": -55.685691833496094, "logps_train/ref_1_2": -111.5, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -57.0, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -66.5, "rewards_train/1-2": 1.0215635299682617, "rewards_train/1-l": -2.2354044914245605, "rewards_train/1-w": 1.7745883464813232, "rewards_train/2-2": 2.1548104286193848, "rewards_train/2-w": 1.0841652154922485, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.009992837905884, "rewards_train/margins_1": 0.7530248165130615, "rewards_train/margins_2": 1.0706452131271362, "step": 486 }, { "epoch": 1.46, "logps_train/policy_1_2": -94.57745361328125, "logps_train/policy_1_l": -140.30746459960938, "logps_train/policy_1_w": -90.3668441772461, "logps_train/policy_2_2": -74.30986785888672, "logps_train/policy_2_w": -116.26974487304688, "logps_train/ref_1_2": -110.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -117.5, "logps_train/ref_2_2": -94.5, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.5156919956207275, "rewards_train/1-l": -2.283383846282959, "rewards_train/1-w": 2.713315725326538, "rewards_train/2-2": 1.9987008571624756, "rewards_train/2-w": 1.365213394165039, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.996699571609497, "rewards_train/margins_1": 1.1976237297058105, "rewards_train/margins_2": 0.6334874629974365, "step": 486 }, { "epoch": 1.46, "logps_train/policy_1_2": -165.2479248046875, "logps_train/policy_1_l": -217.35667419433594, "logps_train/policy_1_w": -120.62164306640625, "logps_train/policy_2_2": -112.74998474121094, "logps_train/policy_2_w": -183.36544799804688, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.3095824718475342, "rewards_train/1-l": -2.5419180393218994, "rewards_train/1-w": 4.606585502624512, "rewards_train/2-2": 3.1656267642974854, "rewards_train/2-w": 2.3040809631347656, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.148503541946411, "rewards_train/margins_1": 3.2970030307769775, "rewards_train/margins_2": 0.8615458011627197, "step": 486 }, { "epoch": 1.46, "logps_train/policy_1_2": -191.92916870117188, "logps_train/policy_1_l": -176.09942626953125, "logps_train/policy_1_w": -145.43743896484375, "logps_train/policy_2_2": -142.64691162109375, "logps_train/policy_2_w": -187.25042724609375, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 2.171144723892212, "rewards_train/1-l": -1.5439265966415405, "rewards_train/1-w": 3.9304747581481934, "rewards_train/2-2": 4.030620574951172, "rewards_train/2-w": 2.4601149559020996, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.474401354789734, "rewards_train/margins_1": 1.7593300342559814, "rewards_train/margins_2": 1.5705056190490723, "step": 486 }, { "epoch": 1.46, "logps_train/policy_1_2": -205.8731689453125, "logps_train/policy_1_l": -284.37445068359375, "logps_train/policy_1_w": -125.03620147705078, "logps_train/policy_2_2": -171.1694793701172, "logps_train/policy_2_w": -165.9869384765625, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -252.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 2.2283084392547607, "rewards_train/1-l": -3.268692970275879, "rewards_train/1-w": 3.463566541671753, "rewards_train/2-2": 3.6361770629882812, "rewards_train/2-w": 1.9763052463531494, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.732259511947632, "rewards_train/margins_1": 1.2352581024169922, "rewards_train/margins_2": 1.6598718166351318, "step": 486 }, { "epoch": 1.46, "logps_train/policy_1_2": -130.4317626953125, "logps_train/policy_1_l": -196.20855712890625, "logps_train/policy_1_w": -103.91812896728516, "logps_train/policy_2_2": -103.17804718017578, "logps_train/policy_2_w": -127.59111022949219, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 2.2845592498779297, "rewards_train/1-l": -2.505230665206909, "rewards_train/1-w": 3.3443198204040527, "rewards_train/2-2": 3.2778987884521484, "rewards_train/2-w": 2.297138214111328, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.849550485610962, "rewards_train/margins_1": 1.059760570526123, "rewards_train/margins_2": 0.9807605743408203, "step": 486 }, { "epoch": 1.46, "logps_train/policy_1_2": -216.4876251220703, "logps_train/policy_1_l": -225.601318359375, "logps_train/policy_1_w": -149.84207153320312, "logps_train/policy_2_2": -187.26620483398438, "logps_train/policy_2_w": -179.64877319335938, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -191.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -230.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 2.765300750732422, "rewards_train/1-l": -3.4433341026306152, "rewards_train/1-w": 3.7032933235168457, "rewards_train/2-2": 4.256193161010742, "rewards_train/2-w": 2.6038737297058105, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.146627426147461, "rewards_train/margins_1": 0.9379925727844238, "rewards_train/margins_2": 1.6523194313049316, "step": 487 }, { "epoch": 1.46, "logps_train/policy_1_2": -201.57640075683594, "logps_train/policy_1_l": -153.33724975585938, "logps_train/policy_1_w": -100.97346496582031, "logps_train/policy_2_2": -152.24081420898438, "logps_train/policy_2_w": -133.5896453857422, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.4568126201629639, "rewards_train/1-l": -2.465951442718506, "rewards_train/1-w": 2.470036506652832, "rewards_train/2-2": 3.3921289443969727, "rewards_train/2-w": 1.0000202655792236, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.935987949371338, "rewards_train/margins_1": 1.0132238864898682, "rewards_train/margins_2": 2.392108678817749, "step": 487 }, { "epoch": 1.46, "logps_train/policy_1_2": -89.4083480834961, "logps_train/policy_1_l": -141.1602783203125, "logps_train/policy_1_w": -106.70567321777344, "logps_train/policy_2_2": -70.06114196777344, "logps_train/policy_2_w": -132.61155700683594, "logps_train/ref_1_2": -105.5, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.615806221961975, "rewards_train/1-l": -2.0906620025634766, "rewards_train/1-w": 2.9466207027435303, "rewards_train/2-2": 2.1876354217529297, "rewards_train/2-w": 1.9247822761535645, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 5.037282705307007, "rewards_train/margins_1": 1.3308144807815552, "rewards_train/margins_2": 0.26285314559936523, "step": 487 }, { "epoch": 1.46, "logps_train/policy_1_2": -203.22000122070312, "logps_train/policy_1_l": -162.06881713867188, "logps_train/policy_1_w": -89.29878234863281, "logps_train/policy_2_2": -164.94400024414062, "logps_train/policy_2_w": -116.87802124023438, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.2928431034088135, "rewards_train/1-l": -2.0895230770111084, "rewards_train/1-w": 3.410747528076172, "rewards_train/2-2": 3.070443630218506, "rewards_train/2-w": 2.4684481620788574, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.50027060508728, "rewards_train/margins_1": 2.1179044246673584, "rewards_train/margins_2": 0.6019954681396484, "step": 487 }, { "epoch": 1.46, "logps_train/policy_1_2": -159.40530395507812, "logps_train/policy_1_l": -158.97634887695312, "logps_train/policy_1_w": -131.96754455566406, "logps_train/policy_2_2": -133.64315795898438, "logps_train/policy_2_w": -148.25790405273438, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.7266569137573242, "rewards_train/1-l": -2.137479066848755, "rewards_train/1-w": 2.6938700675964355, "rewards_train/2-2": 3.023183822631836, "rewards_train/2-w": 2.047647476196289, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.83134913444519, "rewards_train/margins_1": 0.9672131538391113, "rewards_train/margins_2": 0.9755363464355469, "step": 487 }, { "epoch": 1.46, "logps_train/policy_1_2": -218.62478637695312, "logps_train/policy_1_l": -222.91896057128906, "logps_train/policy_1_w": -130.70309448242188, "logps_train/policy_2_2": -167.58773803710938, "logps_train/policy_2_w": -175.128662109375, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -193.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 2.200021505355835, "rewards_train/1-l": -2.983692169189453, "rewards_train/1-w": 3.2918004989624023, "rewards_train/2-2": 3.931851387023926, "rewards_train/2-w": 1.5371334552764893, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.2754926681518555, "rewards_train/margins_1": 1.0917789936065674, "rewards_train/margins_2": 2.3947179317474365, "step": 487 }, { "epoch": 1.46, "logps_train/policy_1_2": -105.82615661621094, "logps_train/policy_1_l": -119.56369018554688, "logps_train/policy_1_w": -119.26541137695312, "logps_train/policy_2_2": -79.65303039550781, "logps_train/policy_2_w": -160.05667114257812, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -101.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 1.1136739253997803, "rewards_train/1-l": -1.4507050514221191, "rewards_train/1-w": 3.8508028984069824, "rewards_train/2-2": 2.1100873947143555, "rewards_train/2-w": 1.4943327903747559, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.301507949829102, "rewards_train/margins_1": 2.737128973007202, "rewards_train/margins_2": 0.6157546043395996, "step": 487 }, { "epoch": 1.46, "logps_train/policy_1_2": -77.24501037597656, "logps_train/policy_1_l": -109.66686248779297, "logps_train/policy_1_w": -137.27902221679688, "logps_train/policy_2_2": -61.55399703979492, "logps_train/policy_2_w": -176.85711669921875, "logps_train/ref_1_2": -90.0, "logps_train/ref_1_l": -92.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -80.5, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.2848742008209229, "rewards_train/1-l": -1.766686201095581, "rewards_train/1-w": 3.8447537422180176, "rewards_train/2-2": 1.8938190937042236, "rewards_train/2-w": 1.693976879119873, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.611439943313599, "rewards_train/margins_1": 2.5598795413970947, "rewards_train/margins_2": 0.19984221458435059, "step": 487 }, { "epoch": 1.46, "learning_rate": 9.440355778282107e-07, "loss": 0.4168, "step": 488 }, { "epoch": 1.46, "logps_train/policy_1_2": -183.64907836914062, "logps_train/policy_1_l": -241.7583465576172, "logps_train/policy_1_w": -143.45635986328125, "logps_train/policy_2_2": -143.33538818359375, "logps_train/policy_2_w": -182.35328674316406, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": 1.8804044723510742, "rewards_train/1-l": -1.5109914541244507, "rewards_train/1-w": 3.328582525253296, "rewards_train/2-2": 3.1203675270080566, "rewards_train/2-w": 1.6537340879440308, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.839573979377747, "rewards_train/margins_1": 1.4481780529022217, "rewards_train/margins_2": 1.4666334390640259, "step": 488 }, { "epoch": 1.46, "logps_train/policy_1_2": -227.62106323242188, "logps_train/policy_1_l": -202.79647827148438, "logps_train/policy_1_w": -116.51819610595703, "logps_train/policy_2_2": -188.2762451171875, "logps_train/policy_2_w": -162.1703338623047, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -221.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.4308624267578125, "rewards_train/1-l": -3.0312089920043945, "rewards_train/1-w": 3.5028679370880127, "rewards_train/2-2": 3.2395620346069336, "rewards_train/2-w": 2.034529209136963, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.534076929092407, "rewards_train/margins_1": 2.0720055103302, "rewards_train/margins_2": 1.2050328254699707, "step": 488 }, { "epoch": 1.46, "logps_train/policy_1_2": -164.10142517089844, "logps_train/policy_1_l": -192.49893188476562, "logps_train/policy_1_w": -130.04595947265625, "logps_train/policy_2_2": -131.23626708984375, "logps_train/policy_2_w": -171.7342529296875, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.8257948160171509, "rewards_train/1-l": -2.2033605575561523, "rewards_train/1-w": 3.4379825592041016, "rewards_train/2-2": 3.204498291015625, "rewards_train/2-w": 1.7355587482452393, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.641343116760254, "rewards_train/margins_1": 1.6121877431869507, "rewards_train/margins_2": 1.4689395427703857, "step": 488 }, { "epoch": 1.46, "logps_train/policy_1_2": -220.356201171875, "logps_train/policy_1_l": -160.12310791015625, "logps_train/policy_1_w": -182.55975341796875, "logps_train/policy_2_2": -187.1619873046875, "logps_train/policy_2_w": -220.5818328857422, "logps_train/ref_1_2": -243.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -223.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 2.2290289402008057, "rewards_train/1-l": -1.517975091934204, "rewards_train/1-w": 3.6039857864379883, "rewards_train/2-2": 3.546104907989502, "rewards_train/2-w": 1.9244343042373657, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.121960878372192, "rewards_train/margins_1": 1.3749568462371826, "rewards_train/margins_2": 1.6216706037521362, "step": 488 }, { "epoch": 1.46, "logps_train/policy_1_2": -174.98355102539062, "logps_train/policy_1_l": -293.60821533203125, "logps_train/policy_1_w": -156.8711395263672, "logps_train/policy_2_2": -138.0952911376953, "logps_train/policy_2_w": -205.68325805664062, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -262.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -225.0, "rewards_train/1-2": 2.1094584465026855, "rewards_train/1-l": -3.1233224868774414, "rewards_train/1-w": 3.4691367149353027, "rewards_train/2-2": 3.375627040863037, "rewards_train/2-w": 1.8965177536010742, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.592459201812744, "rewards_train/margins_1": 1.3596782684326172, "rewards_train/margins_2": 1.479109287261963, "step": 488 }, { "epoch": 1.46, "logps_train/policy_1_2": -159.44415283203125, "logps_train/policy_1_l": -131.04066467285156, "logps_train/policy_1_w": -73.03724670410156, "logps_train/policy_2_2": -124.89176940917969, "logps_train/policy_2_w": -96.71672058105469, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -89.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -103.0, "rewards_train/1-2": 0.49464595317840576, "rewards_train/1-l": -2.458460807800293, "rewards_train/1-w": 1.5941264629364014, "rewards_train/2-2": 2.0569169521331787, "rewards_train/2-w": 0.6474686861038208, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.052587270736694, "rewards_train/margins_1": 1.0994805097579956, "rewards_train/margins_2": 1.409448266029358, "step": 488 }, { "epoch": 1.46, "logps_train/policy_1_2": -135.64564514160156, "logps_train/policy_1_l": -120.947021484375, "logps_train/policy_1_w": -130.052490234375, "logps_train/policy_2_2": -99.28927612304688, "logps_train/policy_2_w": -153.77218627929688, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.9213731288909912, "rewards_train/1-l": -1.4861094951629639, "rewards_train/1-w": 3.018773078918457, "rewards_train/2-2": 3.127322196960449, "rewards_train/2-w": 2.007157325744629, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.504882574081421, "rewards_train/margins_1": 1.0973999500274658, "rewards_train/margins_2": 1.1201648712158203, "step": 488 }, { "epoch": 1.46, "logps_train/policy_1_2": -206.43699645996094, "logps_train/policy_1_l": -184.18507385253906, "logps_train/policy_1_w": -155.50393676757812, "logps_train/policy_2_2": -160.20077514648438, "logps_train/policy_2_w": -204.54539489746094, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": 2.157862901687622, "rewards_train/1-l": -1.6845234632492065, "rewards_train/1-w": 4.819918632507324, "rewards_train/2-2": 3.6689844131469727, "rewards_train/2-w": 2.411086082458496, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.504442095756531, "rewards_train/margins_1": 2.662055730819702, "rewards_train/margins_2": 1.2578983306884766, "step": 488 }, { "epoch": 1.46, "logps_train/policy_1_2": -168.69146728515625, "logps_train/policy_1_l": -171.17315673828125, "logps_train/policy_1_w": -79.25699615478516, "logps_train/policy_2_2": -131.3731689453125, "logps_train/policy_2_w": -112.41879272460938, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.79335355758667, "rewards_train/1-l": -3.4977850914001465, "rewards_train/1-w": 2.760237693786621, "rewards_train/2-2": 3.33768367767334, "rewards_train/2-w": 1.3503081798553467, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.258022785186768, "rewards_train/margins_1": 0.9668841361999512, "rewards_train/margins_2": 1.9873754978179932, "step": 489 }, { "epoch": 1.46, "logps_train/policy_1_2": -109.68896484375, "logps_train/policy_1_l": -74.28414916992188, "logps_train/policy_1_w": -47.12723922729492, "logps_train/policy_2_2": -85.81549835205078, "logps_train/policy_2_w": -65.13998413085938, "logps_train/ref_1_2": -119.5, "logps_train/ref_1_l": -62.0, "logps_train/ref_1_w": -67.0, "logps_train/ref_2_2": -103.5, "logps_train/ref_2_w": -77.0, "rewards_train/1-2": 0.9889156818389893, "rewards_train/1-l": -1.2116180658340454, "rewards_train/1-w": 2.0075883865356445, "rewards_train/2-2": 1.7762629985809326, "rewards_train/2-w": 1.1563140153884888, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.21920645236969, "rewards_train/margins_1": 1.0186727046966553, "rewards_train/margins_2": 0.6199489831924438, "step": 489 }, { "epoch": 1.46, "logps_train/policy_1_2": -141.8596954345703, "logps_train/policy_1_l": -159.28964233398438, "logps_train/policy_1_w": -144.2014923095703, "logps_train/policy_2_2": -118.68789672851562, "logps_train/policy_2_w": -175.56561279296875, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": 2.142155647277832, "rewards_train/1-l": -1.2621687650680542, "rewards_train/1-w": 3.417351245880127, "rewards_train/2-2": 2.824960708618164, "rewards_train/2-w": 2.162189483642578, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.679520010948181, "rewards_train/margins_1": 1.275195598602295, "rewards_train/margins_2": 0.6627712249755859, "step": 489 }, { "epoch": 1.46, "logps_train/policy_1_2": -149.85568237304688, "logps_train/policy_1_l": -133.05580139160156, "logps_train/policy_1_w": -92.06393432617188, "logps_train/policy_2_2": -126.16728973388672, "logps_train/policy_2_w": -115.83284759521484, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.222830057144165, "rewards_train/1-l": -1.7691056728363037, "rewards_train/1-w": 2.768606662750244, "rewards_train/2-2": 2.4582712650299072, "rewards_train/2-w": 1.7831206321716309, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.537712335586548, "rewards_train/margins_1": 1.545776605606079, "rewards_train/margins_2": 0.6751506328582764, "step": 489 }, { "epoch": 1.46, "logps_train/policy_1_2": -94.64498901367188, "logps_train/policy_1_l": -103.19862365722656, "logps_train/policy_1_w": -123.14340209960938, "logps_train/policy_2_2": -71.88886260986328, "logps_train/policy_2_w": -160.80804443359375, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.72300124168396, "rewards_train/1-l": -1.209658145904541, "rewards_train/1-w": 2.8571937084198, "rewards_train/2-2": 2.2876763343811035, "rewards_train/2-w": 1.2129454612731934, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.066851854324341, "rewards_train/margins_1": 1.1341924667358398, "rewards_train/margins_2": 1.0747308731079102, "step": 489 }, { "epoch": 1.46, "logps_train/policy_1_2": -203.18507385253906, "logps_train/policy_1_l": -155.31361389160156, "logps_train/policy_1_w": -120.67105102539062, "logps_train/policy_2_2": -154.43348693847656, "logps_train/policy_2_w": -169.14694213867188, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.8908683061599731, "rewards_train/1-l": -2.299476385116577, "rewards_train/1-w": 3.231966972351074, "rewards_train/2-2": 4.126964569091797, "rewards_train/2-w": 1.6660683155059814, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.531443357467651, "rewards_train/margins_1": 1.341098666191101, "rewards_train/margins_2": 2.4608962535858154, "step": 489 }, { "epoch": 1.46, "logps_train/policy_1_2": -171.37379455566406, "logps_train/policy_1_l": -189.1309051513672, "logps_train/policy_1_w": -132.99720764160156, "logps_train/policy_2_2": -130.30038452148438, "logps_train/policy_2_w": -171.92059326171875, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 2.128244638442993, "rewards_train/1-l": -2.8668019771575928, "rewards_train/1-w": 3.939342498779297, "rewards_train/2-2": 3.8980865478515625, "rewards_train/2-w": 2.3790335655212402, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.80614447593689, "rewards_train/margins_1": 1.8110978603363037, "rewards_train/margins_2": 1.5190529823303223, "step": 489 }, { "epoch": 1.46, "logps_train/policy_1_2": -114.084716796875, "logps_train/policy_1_l": -133.4470977783203, "logps_train/policy_1_w": -129.31753540039062, "logps_train/policy_2_2": -88.1156234741211, "logps_train/policy_2_w": -177.60946655273438, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -113.5, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": 1.7052007913589478, "rewards_train/1-l": -1.3088698387145996, "rewards_train/1-w": 2.5284018516540527, "rewards_train/2-2": 2.5247652530670166, "rewards_train/2-w": 0.35858482122421265, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8372716903686523, "rewards_train/margins_1": 0.823201060295105, "rewards_train/margins_2": 2.166180431842804, "step": 489 }, { "epoch": 1.47, "learning_rate": 9.247803910457226e-07, "loss": 0.3908, "step": 490 }, { "epoch": 1.47, "logps_train/policy_1_2": -140.49801635742188, "logps_train/policy_1_l": -142.6754150390625, "logps_train/policy_1_w": -113.00711822509766, "logps_train/policy_2_2": -107.71176147460938, "logps_train/policy_2_w": -158.29434204101562, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.3552758693695068, "rewards_train/1-l": -1.8980114459991455, "rewards_train/1-w": 3.1854209899902344, "rewards_train/2-2": 2.983315944671631, "rewards_train/2-w": 1.1678321361541748, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.08343243598938, "rewards_train/margins_1": 1.8301451206207275, "rewards_train/margins_2": 1.815483808517456, "step": 490 }, { "epoch": 1.47, "logps_train/policy_1_2": -167.41134643554688, "logps_train/policy_1_l": -161.03817749023438, "logps_train/policy_1_w": -90.25071716308594, "logps_train/policy_2_2": -125.45548248291016, "logps_train/policy_2_w": -134.9225616455078, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.3432406187057495, "rewards_train/1-l": -3.296200752258301, "rewards_train/1-w": 3.274928092956543, "rewards_train/2-2": 3.257577419281006, "rewards_train/2-w": 1.5108689069747925, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.571128845214844, "rewards_train/margins_1": 1.9316874742507935, "rewards_train/margins_2": 1.7467085123062134, "step": 490 }, { "epoch": 1.47, "logps_train/policy_1_2": -146.9671630859375, "logps_train/policy_1_l": -145.05685424804688, "logps_train/policy_1_w": -104.06690216064453, "logps_train/policy_2_2": -94.9908447265625, "logps_train/policy_2_w": -162.88385009765625, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.6759392023086548, "rewards_train/1-l": -1.3595912456512451, "rewards_train/1-w": 3.0647945404052734, "rewards_train/2-2": 2.9368536472320557, "rewards_train/2-w": 1.3577089309692383, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.4243857860565186, "rewards_train/margins_1": 1.3888553380966187, "rewards_train/margins_2": 1.5791447162628174, "step": 490 }, { "epoch": 1.47, "logps_train/policy_1_2": -178.5260772705078, "logps_train/policy_1_l": -190.6450653076172, "logps_train/policy_1_w": -117.49476623535156, "logps_train/policy_2_2": -133.85342407226562, "logps_train/policy_2_w": -174.7740478515625, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.9723923206329346, "rewards_train/1-l": -2.544194459915161, "rewards_train/1-w": 3.425523519515991, "rewards_train/2-2": 3.5490329265594482, "rewards_train/2-w": 1.9663453102111816, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.969717979431152, "rewards_train/margins_1": 1.4531311988830566, "rewards_train/margins_2": 1.5826876163482666, "step": 490 }, { "epoch": 1.47, "logps_train/policy_1_2": -217.38137817382812, "logps_train/policy_1_l": -192.01168823242188, "logps_train/policy_1_w": -93.74591064453125, "logps_train/policy_2_2": -176.20672607421875, "logps_train/policy_2_w": -121.995361328125, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.1126435995101929, "rewards_train/1-l": -2.6152303218841553, "rewards_train/1-w": 2.86525297164917, "rewards_train/2-2": 3.038703441619873, "rewards_train/2-w": 1.8574950695037842, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.480483293533325, "rewards_train/margins_1": 1.752609372138977, "rewards_train/margins_2": 1.1812083721160889, "step": 490 }, { "epoch": 1.47, "logps_train/policy_1_2": -135.15721130371094, "logps_train/policy_1_l": -158.06338500976562, "logps_train/policy_1_w": -150.58969116210938, "logps_train/policy_2_2": -105.98069763183594, "logps_train/policy_2_w": -190.72943115234375, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": 1.792482614517212, "rewards_train/1-l": -1.720205545425415, "rewards_train/1-w": 3.757437229156494, "rewards_train/2-2": 2.8062281608581543, "rewards_train/2-w": 2.2231502532958984, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.477642774581909, "rewards_train/margins_1": 1.9649546146392822, "rewards_train/margins_2": 0.5830779075622559, "step": 490 }, { "epoch": 1.47, "logps_train/policy_1_2": -137.70066833496094, "logps_train/policy_1_l": -123.68589782714844, "logps_train/policy_1_w": -128.2752685546875, "logps_train/policy_2_2": -117.95571899414062, "logps_train/policy_2_w": -160.09393310546875, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.502198338508606, "rewards_train/1-l": -0.4631209969520569, "rewards_train/1-w": 3.137317180633545, "rewards_train/2-2": 2.459897041320801, "rewards_train/2-w": 2.0523250102996826, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.600438177585602, "rewards_train/margins_1": 1.635118842124939, "rewards_train/margins_2": 0.40757203102111816, "step": 490 }, { "epoch": 1.47, "logps_train/policy_1_2": -156.47923278808594, "logps_train/policy_1_l": -95.28528594970703, "logps_train/policy_1_w": -99.73922729492188, "logps_train/policy_2_2": -112.42369079589844, "logps_train/policy_2_w": -140.03570556640625, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 0.8255144953727722, "rewards_train/1-l": -1.161878228187561, "rewards_train/1-w": 3.315920352935791, "rewards_train/2-2": 2.6638803482055664, "rewards_train/2-w": 1.529241919517517, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.477798581123352, "rewards_train/margins_1": 2.490405857563019, "rewards_train/margins_2": 1.1346384286880493, "step": 490 }, { "epoch": 1.47, "logps_train/policy_1_2": -197.75457763671875, "logps_train/policy_1_l": -118.37336730957031, "logps_train/policy_1_w": -128.13389587402344, "logps_train/policy_2_2": -150.60031127929688, "logps_train/policy_2_w": -171.45785522460938, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.105792760848999, "rewards_train/1-l": -1.110213041305542, "rewards_train/1-w": 3.4995014667510986, "rewards_train/2-2": 3.1735620498657227, "rewards_train/2-w": 1.9577302932739258, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.609714508056641, "rewards_train/margins_1": 2.3937087059020996, "rewards_train/margins_2": 1.2158317565917969, "step": 491 }, { "epoch": 1.47, "logps_train/policy_1_2": -138.84365844726562, "logps_train/policy_1_l": -242.03257751464844, "logps_train/policy_1_w": -154.1062774658203, "logps_train/policy_2_2": -108.53907012939453, "logps_train/policy_2_w": -210.93328857421875, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 2.337508201599121, "rewards_train/1-l": -1.9970077276229858, "rewards_train/1-w": 4.6299967765808105, "rewards_train/2-2": 3.42421817779541, "rewards_train/2-w": 2.050421714782715, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.627004504203796, "rewards_train/margins_1": 2.2924885749816895, "rewards_train/margins_2": 1.3737964630126953, "step": 491 }, { "epoch": 1.47, "logps_train/policy_1_2": -186.17059326171875, "logps_train/policy_1_l": -133.70111083984375, "logps_train/policy_1_w": -79.05159759521484, "logps_train/policy_2_2": -158.94496154785156, "logps_train/policy_2_w": -98.75399780273438, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -113.5, "rewards_train/1-2": 1.8095039129257202, "rewards_train/1-l": -1.6185489892959595, "rewards_train/1-w": 2.2085118293762207, "rewards_train/2-2": 2.897691011428833, "rewards_train/2-w": 1.4706947803497314, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.82706081867218, "rewards_train/margins_1": 0.3990079164505005, "rewards_train/margins_2": 1.4269962310791016, "step": 491 }, { "epoch": 1.47, "logps_train/policy_1_2": -163.3341064453125, "logps_train/policy_1_l": -222.2186737060547, "logps_train/policy_1_w": -105.57980346679688, "logps_train/policy_2_2": -135.49493408203125, "logps_train/policy_2_w": -126.67872619628906, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 1.4158070087432861, "rewards_train/1-l": -2.8240160942077637, "rewards_train/1-w": 3.0783474445343018, "rewards_train/2-2": 2.6118342876434326, "rewards_train/2-w": 2.039940357208252, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.902363538742065, "rewards_train/margins_1": 1.6625404357910156, "rewards_train/margins_2": 0.5718939304351807, "step": 491 }, { "epoch": 1.47, "logps_train/policy_1_2": -128.30123901367188, "logps_train/policy_1_l": -135.5535888671875, "logps_train/policy_1_w": -114.36054992675781, "logps_train/policy_2_2": -88.41595458984375, "logps_train/policy_2_w": -154.50234985351562, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -114.5, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 0.9651877284049988, "rewards_train/1-l": -2.1147329807281494, "rewards_train/1-w": 2.800663948059082, "rewards_train/2-2": 2.8419981002807617, "rewards_train/2-w": 1.0388281345367432, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.9153969287872314, "rewards_train/margins_1": 1.8354762196540833, "rewards_train/margins_2": 1.8031699657440186, "step": 491 }, { "epoch": 1.47, "logps_train/policy_1_2": -192.9615020751953, "logps_train/policy_1_l": -210.6092529296875, "logps_train/policy_1_w": -213.08702087402344, "logps_train/policy_2_2": -152.4111328125, "logps_train/policy_2_w": -269.7718505859375, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -252.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -284.0, "rewards_train/1-2": 2.227287769317627, "rewards_train/1-l": -1.6109248399734497, "rewards_train/1-w": 3.903797149658203, "rewards_train/2-2": 3.88232421875, "rewards_train/2-w": 1.2985939979553223, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.514721989631653, "rewards_train/margins_1": 1.6765093803405762, "rewards_train/margins_2": 2.5837302207946777, "step": 491 }, { "epoch": 1.47, "logps_train/policy_1_2": -117.88282775878906, "logps_train/policy_1_l": -102.76553344726562, "logps_train/policy_1_w": -101.25942993164062, "logps_train/policy_2_2": -91.77111053466797, "logps_train/policy_2_w": -137.05813598632812, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -83.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.0023422241210938, "rewards_train/1-l": -1.9249911308288574, "rewards_train/1-w": 3.2404637336730957, "rewards_train/2-2": 2.2478885650634766, "rewards_train/2-w": 1.866062879562378, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.165454864501953, "rewards_train/margins_1": 2.238121509552002, "rewards_train/margins_2": 0.38182568550109863, "step": 491 }, { "epoch": 1.47, "logps_train/policy_1_2": -216.96188354492188, "logps_train/policy_1_l": -166.29092407226562, "logps_train/policy_1_w": -110.89166259765625, "logps_train/policy_2_2": -171.94432067871094, "logps_train/policy_2_w": -147.29714965820312, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -211.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 1.9397501945495605, "rewards_train/1-l": -1.9165934324264526, "rewards_train/1-w": 3.5223567485809326, "rewards_train/2-2": 3.896193027496338, "rewards_train/2-w": 2.1909875869750977, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.438950181007385, "rewards_train/margins_1": 1.582606554031372, "rewards_train/margins_2": 1.7052054405212402, "step": 491 }, { "epoch": 1.47, "learning_rate": 9.05678942920127e-07, "loss": 0.3859, "step": 492 }, { "epoch": 1.47, "logps_train/policy_1_2": -136.54132080078125, "logps_train/policy_1_l": -129.2443084716797, "logps_train/policy_1_w": -94.65217590332031, "logps_train/policy_2_2": -101.49866485595703, "logps_train/policy_2_w": -124.23312377929688, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.8232109546661377, "rewards_train/1-l": -2.009977102279663, "rewards_train/1-w": 2.8340015411376953, "rewards_train/2-2": 3.087437868118286, "rewards_train/2-w": 1.7399696111679077, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.843978643417358, "rewards_train/margins_1": 1.0107905864715576, "rewards_train/margins_2": 1.3474682569503784, "step": 492 }, { "epoch": 1.47, "logps_train/policy_1_2": -201.97317504882812, "logps_train/policy_1_l": -160.74334716796875, "logps_train/policy_1_w": -91.80979919433594, "logps_train/policy_2_2": -146.1483917236328, "logps_train/policy_2_w": -145.56222534179688, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 0.25268247723579407, "rewards_train/1-l": -2.3290224075317383, "rewards_train/1-w": 2.7682385444641113, "rewards_train/2-2": 2.6382851600646973, "rewards_train/2-w": 1.0640902519226074, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.09726095199585, "rewards_train/margins_1": 2.5155560672283173, "rewards_train/margins_2": 1.5741949081420898, "step": 492 }, { "epoch": 1.47, "logps_train/policy_1_2": -189.6038818359375, "logps_train/policy_1_l": -143.8201141357422, "logps_train/policy_1_w": -99.16559600830078, "logps_train/policy_2_2": -137.3661651611328, "logps_train/policy_2_w": -143.40597534179688, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 1.2411746978759766, "rewards_train/1-l": -2.1225268840789795, "rewards_train/1-w": 3.042534351348877, "rewards_train/2-2": 3.157132625579834, "rewards_train/2-w": 1.5617467164993286, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.1650612354278564, "rewards_train/margins_1": 1.8013596534729004, "rewards_train/margins_2": 1.5953859090805054, "step": 492 }, { "epoch": 1.47, "logps_train/policy_1_2": -193.68255615234375, "logps_train/policy_1_l": -179.10577392578125, "logps_train/policy_1_w": -127.37486267089844, "logps_train/policy_2_2": -173.67408752441406, "logps_train/policy_2_w": -154.96347045898438, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 2.407525062561035, "rewards_train/1-l": -2.0308895111083984, "rewards_train/1-w": 3.3453259468078613, "rewards_train/2-2": 3.4599344730377197, "rewards_train/2-w": 2.397402286529541, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.37621545791626, "rewards_train/margins_1": 0.9378008842468262, "rewards_train/margins_2": 1.0625321865081787, "step": 492 }, { "epoch": 1.47, "logps_train/policy_1_2": -110.98599243164062, "logps_train/policy_1_l": -76.47862243652344, "logps_train/policy_1_w": -88.6351318359375, "logps_train/policy_2_2": -83.31678771972656, "logps_train/policy_2_w": -114.4809799194336, "logps_train/ref_1_2": -119.5, "logps_train/ref_1_l": -61.5, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -105.5, "logps_train/ref_2_w": -126.5, "rewards_train/1-2": 0.8584321737289429, "rewards_train/1-l": -1.4810163974761963, "rewards_train/1-w": 2.47672176361084, "rewards_train/2-2": 2.2249624729156494, "rewards_train/2-w": 1.1815893650054932, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.957738161087036, "rewards_train/margins_1": 1.618289589881897, "rewards_train/margins_2": 1.0433731079101562, "step": 492 }, { "epoch": 1.47, "logps_train/policy_1_2": -94.16487884521484, "logps_train/policy_1_l": -225.37840270996094, "logps_train/policy_1_w": -62.40687561035156, "logps_train/policy_2_2": -70.77481079101562, "logps_train/policy_2_w": -100.67364501953125, "logps_train/ref_1_2": -105.5, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -83.5, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -112.0, "rewards_train/1-2": 1.1585121154785156, "rewards_train/1-l": -4.197214603424072, "rewards_train/1-w": 2.084312915802002, "rewards_train/2-2": 2.3053317070007324, "rewards_train/2-w": 1.1021676063537598, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.281527519226074, "rewards_train/margins_1": 0.9258008003234863, "rewards_train/margins_2": 1.2031641006469727, "step": 492 }, { "epoch": 1.47, "logps_train/policy_1_2": -94.78884887695312, "logps_train/policy_1_l": -166.86148071289062, "logps_train/policy_1_w": -166.32447814941406, "logps_train/policy_2_2": -69.62562561035156, "logps_train/policy_2_w": -226.94467163085938, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -236.0, "rewards_train/1-2": 1.6304905414581299, "rewards_train/1-l": -2.3955235481262207, "rewards_train/1-w": 3.7784905433654785, "rewards_train/2-2": 2.5163440704345703, "rewards_train/2-w": 0.9211568236351013, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.174014091491699, "rewards_train/margins_1": 2.1480000019073486, "rewards_train/margins_2": 1.595187246799469, "step": 492 }, { "epoch": 1.47, "logps_train/policy_1_2": -117.81886291503906, "logps_train/policy_1_l": -102.4878921508789, "logps_train/policy_1_w": -84.91311645507812, "logps_train/policy_2_2": -88.74793243408203, "logps_train/policy_2_w": -116.27340698242188, "logps_train/ref_1_2": -127.5, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -112.5, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 0.9688950777053833, "rewards_train/1-l": -1.9372661113739014, "rewards_train/1-w": 2.607907772064209, "rewards_train/2-2": 2.353332042694092, "rewards_train/2-w": 1.3265652656555176, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.54517388343811, "rewards_train/margins_1": 1.6390126943588257, "rewards_train/margins_2": 1.0267667770385742, "step": 492 }, { "epoch": 1.48, "logps_train/policy_1_2": -208.12274169921875, "logps_train/policy_1_l": -208.3448486328125, "logps_train/policy_1_w": -128.2179412841797, "logps_train/policy_2_2": -156.42747497558594, "logps_train/policy_2_w": -176.5804443359375, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.6463196277618408, "rewards_train/1-l": -2.924327850341797, "rewards_train/1-w": 3.950861930847168, "rewards_train/2-2": 3.4431910514831543, "rewards_train/2-w": 2.0732052326202393, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.875189781188965, "rewards_train/margins_1": 2.304542303085327, "rewards_train/margins_2": 1.369985818862915, "step": 493 }, { "epoch": 1.48, "logps_train/policy_1_2": -342.49749755859375, "logps_train/policy_1_l": -238.13172912597656, "logps_train/policy_1_w": -222.22109985351562, "logps_train/policy_2_2": -261.8460998535156, "logps_train/policy_2_w": -300.388671875, "logps_train/ref_1_2": -362.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -268.0, "logps_train/ref_2_2": -314.0, "logps_train/ref_2_w": -314.0, "rewards_train/1-2": 1.9627485275268555, "rewards_train/1-l": -1.9975477457046509, "rewards_train/1-w": 4.6966400146484375, "rewards_train/2-2": 5.215389251708984, "rewards_train/2-w": 1.3455085754394531, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.694187760353088, "rewards_train/margins_1": 2.733891487121582, "rewards_train/margins_2": 3.8698806762695312, "step": 493 }, { "epoch": 1.48, "logps_train/policy_1_2": -162.8154296875, "logps_train/policy_1_l": -164.6730194091797, "logps_train/policy_1_w": -91.24732971191406, "logps_train/policy_2_2": -115.4187240600586, "logps_train/policy_2_w": -127.33392333984375, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 0.6184571385383606, "rewards_train/1-l": -1.678629994392395, "rewards_train/1-w": 2.628929615020752, "rewards_train/2-2": 2.782835006713867, "rewards_train/2-w": 1.0556707382202148, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.307559609413147, "rewards_train/margins_1": 2.0104724764823914, "rewards_train/margins_2": 1.7271642684936523, "step": 493 }, { "epoch": 1.48, "logps_train/policy_1_2": -182.80126953125, "logps_train/policy_1_l": -151.04983520507812, "logps_train/policy_1_w": -109.80055236816406, "logps_train/policy_2_2": -136.02825927734375, "logps_train/policy_2_w": -151.28018188476562, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.4776849746704102, "rewards_train/1-l": -1.6342809200286865, "rewards_train/1-w": 2.958031177520752, "rewards_train/2-2": 3.123737335205078, "rewards_train/2-w": 1.2946381568908691, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.5923120975494385, "rewards_train/margins_1": 1.4803462028503418, "rewards_train/margins_2": 1.829099178314209, "step": 493 }, { "epoch": 1.48, "logps_train/policy_1_2": -129.56602478027344, "logps_train/policy_1_l": -200.29190063476562, "logps_train/policy_1_w": -115.25463104248047, "logps_train/policy_2_2": -94.28410339355469, "logps_train/policy_2_w": -152.15017700195312, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.138709545135498, "rewards_train/1-l": -2.6801676750183105, "rewards_train/1-w": 3.176100254058838, "rewards_train/2-2": 2.3950271606445312, "rewards_train/2-w": 1.5693581104278564, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.856267929077148, "rewards_train/margins_1": 2.03739070892334, "rewards_train/margins_2": 0.8256690502166748, "step": 493 }, { "epoch": 1.48, "logps_train/policy_1_2": -123.34822082519531, "logps_train/policy_1_l": -235.96926879882812, "logps_train/policy_1_w": -165.77003479003906, "logps_train/policy_2_2": -82.813232421875, "logps_train/policy_2_w": -227.3106689453125, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -107.5, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 1.3690838813781738, "rewards_train/1-l": -3.8514199256896973, "rewards_train/1-w": 3.454636573791504, "rewards_train/2-2": 2.4510982036590576, "rewards_train/2-w": 1.274010181427002, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.306056499481201, "rewards_train/margins_1": 2.08555269241333, "rewards_train/margins_2": 1.1770880222320557, "step": 493 }, { "epoch": 1.48, "logps_train/policy_1_2": -164.14041137695312, "logps_train/policy_1_l": -62.86756896972656, "logps_train/policy_1_w": -67.8844223022461, "logps_train/policy_2_2": -117.845703125, "logps_train/policy_2_w": -96.40227508544922, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -51.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 0.8750205039978027, "rewards_train/1-l": -1.1951552629470825, "rewards_train/1-w": 3.283432722091675, "rewards_train/2-2": 2.925585985183716, "rewards_train/2-w": 2.150397300720215, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.478587985038757, "rewards_train/margins_1": 2.408412218093872, "rewards_train/margins_2": 0.775188684463501, "step": 493 }, { "epoch": 1.48, "logps_train/policy_1_2": -169.8642578125, "logps_train/policy_1_l": -232.18475341796875, "logps_train/policy_1_w": -196.98486328125, "logps_train/policy_2_2": -126.89878845214844, "logps_train/policy_2_w": -251.35626220703125, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -240.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -272.0, "rewards_train/1-2": 2.27685546875, "rewards_train/1-l": -2.0122270584106445, "rewards_train/1-w": 4.391357421875, "rewards_train/2-2": 3.8890278339385986, "rewards_train/2-w": 2.029216766357422, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.4035844802856445, "rewards_train/margins_1": 2.114501953125, "rewards_train/margins_2": 1.8598110675811768, "step": 493 }, { "epoch": 1.48, "learning_rate": 8.867330977190877e-07, "loss": 0.3783, "step": 494 }, { "epoch": 1.48, "logps_train/policy_1_2": -183.58633422851562, "logps_train/policy_1_l": -162.03643798828125, "logps_train/policy_1_w": -146.76568603515625, "logps_train/policy_2_2": -152.6686553955078, "logps_train/policy_2_w": -192.68434143066406, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": 2.194491386413574, "rewards_train/1-l": -1.9286441802978516, "rewards_train/1-w": 3.5828065872192383, "rewards_train/2-2": 3.3206350803375244, "rewards_train/2-w": 1.8253158330917358, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.51145076751709, "rewards_train/margins_1": 1.388315200805664, "rewards_train/margins_2": 1.4953192472457886, "step": 494 }, { "epoch": 1.48, "logps_train/policy_1_2": -142.11224365234375, "logps_train/policy_1_l": -134.0624237060547, "logps_train/policy_1_w": -71.81062316894531, "logps_train/policy_2_2": -97.28622436523438, "logps_train/policy_2_w": -107.18820190429688, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -99.5, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.1809635162353516, "rewards_train/1-l": -2.9126884937286377, "rewards_train/1-w": 2.7447192668914795, "rewards_train/2-2": 3.1698150634765625, "rewards_train/2-w": 1.5874301195144653, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.657407760620117, "rewards_train/margins_1": 1.563755750656128, "rewards_train/margins_2": 1.5823849439620972, "step": 494 }, { "epoch": 1.48, "logps_train/policy_1_2": -130.65267944335938, "logps_train/policy_1_l": -135.0519561767578, "logps_train/policy_1_w": -110.27291870117188, "logps_train/policy_2_2": -102.10086059570312, "logps_train/policy_2_w": -143.78115844726562, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.2519199848175049, "rewards_train/1-l": -1.5701372623443604, "rewards_train/1-w": 2.4203648567199707, "rewards_train/2-2": 2.332101821899414, "rewards_train/2-w": 0.8780370354652405, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.990502119064331, "rewards_train/margins_1": 1.1684448719024658, "rewards_train/margins_2": 1.4540647864341736, "step": 494 }, { "epoch": 1.48, "logps_train/policy_1_2": -159.58053588867188, "logps_train/policy_1_l": -111.84717559814453, "logps_train/policy_1_w": -50.364532470703125, "logps_train/policy_2_2": -121.27400970458984, "logps_train/policy_2_w": -77.53463745117188, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -95.5, "logps_train/ref_1_w": -69.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -87.0, "rewards_train/1-2": 0.8263224363327026, "rewards_train/1-l": -1.6429212093353271, "rewards_train/1-w": 1.8498742580413818, "rewards_train/2-2": 2.3429114818573, "rewards_train/2-w": 0.9449734091758728, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.492795467376709, "rewards_train/margins_1": 1.0235518217086792, "rewards_train/margins_2": 1.397938072681427, "step": 494 }, { "epoch": 1.48, "logps_train/policy_1_2": -101.94296264648438, "logps_train/policy_1_l": -131.84959411621094, "logps_train/policy_1_w": -117.76268768310547, "logps_train/policy_2_2": -68.45285034179688, "logps_train/policy_2_w": -152.01712036132812, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -104.5, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.193203091621399, "rewards_train/1-l": -2.729881763458252, "rewards_train/1-w": 2.7421395778656006, "rewards_train/2-2": 2.281277656555176, "rewards_train/2-w": 1.0606887340545654, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.4720213413238525, "rewards_train/margins_1": 1.5489364862442017, "rewards_train/margins_2": 1.2205889225006104, "step": 494 }, { "epoch": 1.48, "logps_train/policy_1_2": -163.66586303710938, "logps_train/policy_1_l": -181.16806030273438, "logps_train/policy_1_w": -155.0494842529297, "logps_train/policy_2_2": -122.63081359863281, "logps_train/policy_2_w": -200.16012573242188, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 1.5810694694519043, "rewards_train/1-l": -1.4696390628814697, "rewards_train/1-w": 3.316926956176758, "rewards_train/2-2": 2.928715467453003, "rewards_train/2-w": 1.50888991355896, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.7865660190582275, "rewards_train/margins_1": 1.7358574867248535, "rewards_train/margins_2": 1.419825553894043, "step": 494 }, { "epoch": 1.48, "logps_train/policy_1_2": -150.70941162109375, "logps_train/policy_1_l": -146.6387939453125, "logps_train/policy_1_w": -120.32980346679688, "logps_train/policy_2_2": -122.68550109863281, "logps_train/policy_2_w": -157.58816528320312, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.9689031839370728, "rewards_train/1-l": -2.3689584732055664, "rewards_train/1-w": 2.8857693672180176, "rewards_train/2-2": 2.083794116973877, "rewards_train/2-w": 1.0849329233169556, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.254727840423584, "rewards_train/margins_1": 1.9168661832809448, "rewards_train/margins_2": 0.9988611936569214, "step": 494 }, { "epoch": 1.48, "logps_train/policy_1_2": -143.80654907226562, "logps_train/policy_1_l": -241.35096740722656, "logps_train/policy_1_w": -201.90643310546875, "logps_train/policy_2_2": -116.07774353027344, "logps_train/policy_2_w": -257.8037414550781, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -252.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -286.0, "rewards_train/1-2": 1.9791100025177002, "rewards_train/1-l": -2.292127847671509, "rewards_train/1-w": 5.093733310699463, "rewards_train/2-2": 3.124452590942383, "rewards_train/2-w": 2.835251808166504, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.385861158370972, "rewards_train/margins_1": 3.1146233081817627, "rewards_train/margins_2": 0.2892007827758789, "step": 494 }, { "epoch": 1.48, "logps_train/policy_1_2": -165.38687133789062, "logps_train/policy_1_l": -209.31983947753906, "logps_train/policy_1_w": -175.38824462890625, "logps_train/policy_2_2": -129.70266723632812, "logps_train/policy_2_w": -217.03343200683594, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -211.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -239.0, "rewards_train/1-2": 1.9859235286712646, "rewards_train/1-l": -2.071046829223633, "rewards_train/1-w": 3.57094144821167, "rewards_train/2-2": 2.9477012157440186, "rewards_train/2-w": 2.195094585418701, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.641988277435303, "rewards_train/margins_1": 1.5850179195404053, "rewards_train/margins_2": 0.7526066303253174, "step": 495 }, { "epoch": 1.48, "logps_train/policy_1_2": -124.92518615722656, "logps_train/policy_1_l": -160.39102172851562, "logps_train/policy_1_w": -150.34487915039062, "logps_train/policy_2_2": -91.80216217041016, "logps_train/policy_2_w": -203.5169677734375, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.6574819087982178, "rewards_train/1-l": -2.45316481590271, "rewards_train/1-w": 2.6514487266540527, "rewards_train/2-2": 2.8572838306427, "rewards_train/2-w": 0.8576778173446655, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.104613542556763, "rewards_train/margins_1": 0.993966817855835, "rewards_train/margins_2": 1.9996060132980347, "step": 495 }, { "epoch": 1.48, "logps_train/policy_1_2": -143.35134887695312, "logps_train/policy_1_l": -211.36558532714844, "logps_train/policy_1_w": -106.91510009765625, "logps_train/policy_2_2": -103.01371002197266, "logps_train/policy_2_w": -154.0959930419922, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.3758032321929932, "rewards_train/1-l": -2.6080431938171387, "rewards_train/1-w": 3.2506775856018066, "rewards_train/2-2": 2.6400351524353027, "rewards_train/2-w": 1.3466507196426392, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.858720779418945, "rewards_train/margins_1": 1.8748743534088135, "rewards_train/margins_2": 1.2933844327926636, "step": 495 }, { "epoch": 1.48, "logps_train/policy_1_2": -157.89059448242188, "logps_train/policy_1_l": -174.5897216796875, "logps_train/policy_1_w": -113.6708984375, "logps_train/policy_2_2": -128.0535888671875, "logps_train/policy_2_w": -152.9933624267578, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.5328161716461182, "rewards_train/1-l": -2.7667858600616455, "rewards_train/1-w": 3.5719733238220215, "rewards_train/2-2": 2.757920742034912, "rewards_train/2-w": 2.04206919670105, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.338759183883667, "rewards_train/margins_1": 2.0391571521759033, "rewards_train/margins_2": 0.7158515453338623, "step": 495 }, { "epoch": 1.48, "logps_train/policy_1_2": -121.9083023071289, "logps_train/policy_1_l": -186.22850036621094, "logps_train/policy_1_w": -190.7542724609375, "logps_train/policy_2_2": -102.06558227539062, "logps_train/policy_2_w": -231.60647583007812, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -225.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 1.9716694355010986, "rewards_train/1-l": -1.7478500604629517, "rewards_train/1-w": 3.3480114936828613, "rewards_train/2-2": 2.705942392349243, "rewards_train/2-w": 1.570602536201477, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.095861554145813, "rewards_train/margins_1": 1.3763420581817627, "rewards_train/margins_2": 1.1353398561477661, "step": 495 }, { "epoch": 1.48, "logps_train/policy_1_2": -151.21087646484375, "logps_train/policy_1_l": -237.43280029296875, "logps_train/policy_1_w": -124.81346130371094, "logps_train/policy_2_2": -119.16161346435547, "logps_train/policy_2_w": -174.32037353515625, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -201.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.5960988998413086, "rewards_train/1-l": -3.6276559829711914, "rewards_train/1-w": 3.698340892791748, "rewards_train/2-2": 2.840088367462158, "rewards_train/2-w": 1.8367129564285278, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.3259968757629395, "rewards_train/margins_1": 2.1022419929504395, "rewards_train/margins_2": 1.0033754110336304, "step": 495 }, { "epoch": 1.48, "logps_train/policy_1_2": -96.68526458740234, "logps_train/policy_1_l": -108.94560241699219, "logps_train/policy_1_w": -103.87348175048828, "logps_train/policy_2_2": -66.02743530273438, "logps_train/policy_2_w": -159.07308959960938, "logps_train/ref_1_2": -104.5, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -83.5, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 0.7877237200737, "rewards_train/1-l": -1.8215136528015137, "rewards_train/1-w": 3.0798392295837402, "rewards_train/2-2": 1.7597568035125732, "rewards_train/2-w": 1.098159670829773, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.901352882385254, "rewards_train/margins_1": 2.2921155095100403, "rewards_train/margins_2": 0.6615971326828003, "step": 495 }, { "epoch": 1.48, "logps_train/policy_1_2": -92.44499206542969, "logps_train/policy_1_l": -106.8441162109375, "logps_train/policy_1_w": -85.47015380859375, "logps_train/policy_2_2": -68.29901885986328, "logps_train/policy_2_w": -117.45464324951172, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.679377794265747, "rewards_train/1-l": -2.309729814529419, "rewards_train/1-w": 2.7561097145080566, "rewards_train/2-2": 2.6772267818450928, "rewards_train/2-w": 1.6475040912628174, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.065839529037476, "rewards_train/margins_1": 1.0767319202423096, "rewards_train/margins_2": 1.0297226905822754, "step": 495 }, { "epoch": 1.49, "learning_rate": 8.679447045236964e-07, "loss": 0.3991, "step": 496 }, { "epoch": 1.49, "logps_train/policy_1_2": -141.034423828125, "logps_train/policy_1_l": -112.8035888671875, "logps_train/policy_1_w": -73.18111419677734, "logps_train/policy_2_2": -114.35877227783203, "logps_train/policy_2_w": -95.77811431884766, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -93.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -111.5, "rewards_train/1-2": 1.4793713092803955, "rewards_train/1-l": -1.9387567043304443, "rewards_train/1-w": 2.813138961791992, "rewards_train/2-2": 2.46334171295166, "rewards_train/2-w": 1.5956261157989502, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.7518956661224365, "rewards_train/margins_1": 1.3337676525115967, "rewards_train/margins_2": 0.86771559715271, "step": 496 }, { "epoch": 1.49, "logps_train/policy_1_2": -168.23831176757812, "logps_train/policy_1_l": -212.71563720703125, "logps_train/policy_1_w": -110.6533203125, "logps_train/policy_2_2": -127.7372055053711, "logps_train/policy_2_w": -157.92373657226562, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.69960618019104, "rewards_train/1-l": -3.27586030960083, "rewards_train/1-w": 3.5014655590057373, "rewards_train/2-2": 2.9801857471466064, "rewards_train/2-w": 1.450203776359558, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.777325868606567, "rewards_train/margins_1": 1.8018593788146973, "rewards_train/margins_2": 1.5299819707870483, "step": 496 }, { "epoch": 1.49, "logps_train/policy_1_2": -120.8139877319336, "logps_train/policy_1_l": -112.72174072265625, "logps_train/policy_1_w": -130.61602783203125, "logps_train/policy_2_2": -87.84746551513672, "logps_train/policy_2_w": -170.79653930664062, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.6420384645462036, "rewards_train/1-l": -0.953424334526062, "rewards_train/1-w": 3.816521406173706, "rewards_train/2-2": 2.754315137863159, "rewards_train/2-w": 2.142221450805664, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.769945740699768, "rewards_train/margins_1": 2.1744829416275024, "rewards_train/margins_2": 0.6120936870574951, "step": 496 }, { "epoch": 1.49, "logps_train/policy_1_2": -131.21241760253906, "logps_train/policy_1_l": -192.11534118652344, "logps_train/policy_1_w": -137.29750061035156, "logps_train/policy_2_2": -99.5621109008789, "logps_train/policy_2_w": -183.41714477539062, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.3412587642669678, "rewards_train/1-l": -2.624034881591797, "rewards_train/1-w": 3.471031665802002, "rewards_train/2-2": 2.6697654724121094, "rewards_train/2-w": 1.254965901374817, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.095066547393799, "rewards_train/margins_1": 2.129772901535034, "rewards_train/margins_2": 1.4147995710372925, "step": 496 }, { "epoch": 1.49, "logps_train/policy_1_2": -127.65459442138672, "logps_train/policy_1_l": -116.74531555175781, "logps_train/policy_1_w": -80.12196350097656, "logps_train/policy_2_2": -93.6250228881836, "logps_train/policy_2_w": -99.11527252197266, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -98.5, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 1.0626660585403442, "rewards_train/1-l": -1.823946237564087, "rewards_train/1-w": 2.5100693702697754, "rewards_train/2-2": 2.684372901916504, "rewards_train/2-w": 1.9994096755981445, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.334015607833862, "rewards_train/margins_1": 1.4474033117294312, "rewards_train/margins_2": 0.6849632263183594, "step": 496 }, { "epoch": 1.49, "logps_train/policy_1_2": -247.93222045898438, "logps_train/policy_1_l": -198.66220092773438, "logps_train/policy_1_w": -137.08309936523438, "logps_train/policy_2_2": -203.36651611328125, "logps_train/policy_2_w": -171.03665161132812, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -245.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 2.2388081550598145, "rewards_train/1-l": -1.793758749961853, "rewards_train/1-w": 3.397158145904541, "rewards_train/2-2": 4.192255973815918, "rewards_train/2-w": 2.049459934234619, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.190916895866394, "rewards_train/margins_1": 1.1583499908447266, "rewards_train/margins_2": 2.142796039581299, "step": 496 }, { "epoch": 1.49, "logps_train/policy_1_2": -224.040771484375, "logps_train/policy_1_l": -217.51702880859375, "logps_train/policy_1_w": -231.45413208007812, "logps_train/policy_2_2": -182.38417053222656, "logps_train/policy_2_w": -281.6513671875, "logps_train/ref_1_2": -245.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -274.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -306.0, "rewards_train/1-2": 2.042797803878784, "rewards_train/1-l": -2.5642035007476807, "rewards_train/1-w": 4.286229133605957, "rewards_train/2-2": 3.9326767921447754, "rewards_train/2-w": 2.406348705291748, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.850432634353638, "rewards_train/margins_1": 2.243431329727173, "rewards_train/margins_2": 1.5263280868530273, "step": 496 }, { "epoch": 1.49, "logps_train/policy_1_2": -184.58909606933594, "logps_train/policy_1_l": -314.94342041015625, "logps_train/policy_1_w": -172.803466796875, "logps_train/policy_2_2": -145.99388122558594, "logps_train/policy_2_w": -217.52508544921875, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -280.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 2.1598405838012695, "rewards_train/1-l": -3.513094186782837, "rewards_train/1-w": 3.7509043216705322, "rewards_train/2-2": 3.359987497329712, "rewards_train/2-w": 2.0037412643432617, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.263998508453369, "rewards_train/margins_1": 1.5910637378692627, "rewards_train/margins_2": 1.3562462329864502, "step": 496 }, { "epoch": 1.49, "logps_train/policy_1_2": -167.59375, "logps_train/policy_1_l": -180.080810546875, "logps_train/policy_1_w": -124.12789916992188, "logps_train/policy_2_2": -135.91539001464844, "logps_train/policy_2_w": -157.2301025390625, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.5273442268371582, "rewards_train/1-l": -2.24362850189209, "rewards_train/1-w": 3.754397392272949, "rewards_train/2-2": 2.690492630004883, "rewards_train/2-w": 2.0801146030426025, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.998025894165039, "rewards_train/margins_1": 2.227053165435791, "rewards_train/margins_2": 0.6103780269622803, "step": 497 }, { "epoch": 1.49, "logps_train/policy_1_2": -131.4838104248047, "logps_train/policy_1_l": -156.89337158203125, "logps_train/policy_1_w": -121.12841796875, "logps_train/policy_2_2": -102.2211685180664, "logps_train/policy_2_w": -150.9832763671875, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.273493766784668, "rewards_train/1-l": -2.292072057723999, "rewards_train/1-w": 2.5437991619110107, "rewards_train/2-2": 2.577882766723633, "rewards_train/2-w": 1.363391637802124, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.83587121963501, "rewards_train/margins_1": 1.2703053951263428, "rewards_train/margins_2": 1.2144911289215088, "step": 497 }, { "epoch": 1.49, "logps_train/policy_1_2": -212.55572509765625, "logps_train/policy_1_l": -307.9069519042969, "logps_train/policy_1_w": -194.3516845703125, "logps_train/policy_2_2": -156.51809692382812, "logps_train/policy_2_w": -271.35260009765625, "logps_train/ref_1_2": -231.0, "logps_train/ref_1_l": -272.0, "logps_train/ref_1_w": -240.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -288.0, "rewards_train/1-2": 1.8569278717041016, "rewards_train/1-l": -3.482102870941162, "rewards_train/1-w": 4.645299911499023, "rewards_train/2-2": 3.4966278076171875, "rewards_train/2-w": 1.789738416671753, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 8.127402782440186, "rewards_train/margins_1": 2.788372039794922, "rewards_train/margins_2": 1.7068893909454346, "step": 497 }, { "epoch": 1.49, "logps_train/policy_1_2": -227.9285125732422, "logps_train/policy_1_l": -206.66424560546875, "logps_train/policy_1_w": -170.28134155273438, "logps_train/policy_2_2": -188.94390869140625, "logps_train/policy_2_w": -229.2719268798828, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -230.0, "logps_train/ref_2_w": -250.0, "rewards_train/1-2": 2.2227745056152344, "rewards_train/1-l": -2.2868335247039795, "rewards_train/1-w": 4.484365463256836, "rewards_train/2-2": 4.061859607696533, "rewards_train/2-w": 2.0759329795837402, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.771198987960815, "rewards_train/margins_1": 2.2615909576416016, "rewards_train/margins_2": 1.985926628112793, "step": 497 }, { "epoch": 1.49, "logps_train/policy_1_2": -120.70840454101562, "logps_train/policy_1_l": -135.93807983398438, "logps_train/policy_1_w": -111.50047302246094, "logps_train/policy_2_2": -90.98127746582031, "logps_train/policy_2_w": -139.95538330078125, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -117.5, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.369784951210022, "rewards_train/1-l": -1.537362813949585, "rewards_train/1-w": 2.5569839477539062, "rewards_train/2-2": 2.676872491836548, "rewards_train/2-w": 1.4232122898101807, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.094346761703491, "rewards_train/margins_1": 1.1871989965438843, "rewards_train/margins_2": 1.2536602020263672, "step": 497 }, { "epoch": 1.49, "logps_train/policy_1_2": -207.6305389404297, "logps_train/policy_1_l": -206.79501342773438, "logps_train/policy_1_w": -109.4725341796875, "logps_train/policy_2_2": -159.92584228515625, "logps_train/policy_2_w": -144.5267333984375, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.3760077953338623, "rewards_train/1-l": -2.568563461303711, "rewards_train/1-w": 3.50821590423584, "rewards_train/2-2": 3.445695638656616, "rewards_train/2-w": 1.8371717929840088, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.076779365539551, "rewards_train/margins_1": 2.1322081089019775, "rewards_train/margins_2": 1.6085238456726074, "step": 497 }, { "epoch": 1.49, "logps_train/policy_1_2": -109.31553649902344, "logps_train/policy_1_l": -142.62759399414062, "logps_train/policy_1_w": -116.74911499023438, "logps_train/policy_2_2": -82.75018310546875, "logps_train/policy_2_w": -158.53208923339844, "logps_train/ref_1_2": -125.5, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": 1.6012585163116455, "rewards_train/1-l": -1.9701815843582153, "rewards_train/1-w": 3.5344631671905518, "rewards_train/2-2": 2.44822359085083, "rewards_train/2-w": 1.423353672027588, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.504644751548767, "rewards_train/margins_1": 1.9332046508789062, "rewards_train/margins_2": 1.0248699188232422, "step": 497 }, { "epoch": 1.49, "logps_train/policy_1_2": -127.01211547851562, "logps_train/policy_1_l": -161.951171875, "logps_train/policy_1_w": -115.3127212524414, "logps_train/policy_2_2": -95.52693939208984, "logps_train/policy_2_w": -158.94540405273438, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.7245702743530273, "rewards_train/1-l": -1.919140100479126, "rewards_train/1-w": 3.365602970123291, "rewards_train/2-2": 3.102383852005005, "rewards_train/2-w": 1.5265536308288574, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.284743070602417, "rewards_train/margins_1": 1.6410326957702637, "rewards_train/margins_2": 1.5758302211761475, "step": 497 }, { "epoch": 1.49, "learning_rate": 8.493155970480074e-07, "loss": 0.3543, "step": 498 }, { "epoch": 1.49, "logps_train/policy_1_2": -184.62893676757812, "logps_train/policy_1_l": -163.1729736328125, "logps_train/policy_1_w": -132.7176971435547, "logps_train/policy_2_2": -144.45208740234375, "logps_train/policy_2_w": -174.2529296875, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.410544991493225, "rewards_train/1-l": -2.0280394554138184, "rewards_train/1-w": 3.5876059532165527, "rewards_train/2-2": 2.99717378616333, "rewards_train/2-w": 1.9559561014175415, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.615645408630371, "rewards_train/margins_1": 2.1770609617233276, "rewards_train/margins_2": 1.0412176847457886, "step": 498 }, { "epoch": 1.49, "logps_train/policy_1_2": -56.929176330566406, "logps_train/policy_1_l": -124.66842651367188, "logps_train/policy_1_w": -41.084861755371094, "logps_train/policy_2_2": -42.64898681640625, "logps_train/policy_2_w": -55.1247673034668, "logps_train/ref_1_2": -68.5, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -60.25, "logps_train/ref_2_2": -58.5, "logps_train/ref_2_w": -69.0, "rewards_train/1-2": 1.1395041942596436, "rewards_train/1-l": -2.573580503463745, "rewards_train/1-w": 1.9071390628814697, "rewards_train/2-2": 1.5972106456756592, "rewards_train/2-w": 1.3554916381835938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.480719566345215, "rewards_train/margins_1": 0.7676348686218262, "rewards_train/margins_2": 0.24171900749206543, "step": 498 }, { "epoch": 1.49, "logps_train/policy_1_2": -114.60569763183594, "logps_train/policy_1_l": -231.4107666015625, "logps_train/policy_1_w": -115.96018981933594, "logps_train/policy_2_2": -79.44837951660156, "logps_train/policy_2_w": -152.2654571533203, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.3390394449234009, "rewards_train/1-l": -2.3349719047546387, "rewards_train/1-w": 2.7672619819641113, "rewards_train/2-2": 2.668052911758423, "rewards_train/2-w": 1.2359540462493896, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.10223388671875, "rewards_train/margins_1": 1.4282225370407104, "rewards_train/margins_2": 1.4320988655090332, "step": 498 }, { "epoch": 1.49, "logps_train/policy_1_2": -117.78179931640625, "logps_train/policy_1_l": -169.93568420410156, "logps_train/policy_1_w": -104.61575317382812, "logps_train/policy_2_2": -96.65896606445312, "logps_train/policy_2_w": -131.14990234375, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.6936949491500854, "rewards_train/1-l": -2.459193706512451, "rewards_train/1-w": 3.311861991882324, "rewards_train/2-2": 2.843478202819824, "rewards_train/2-w": 2.1424317359924316, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.771055698394775, "rewards_train/margins_1": 1.6181670427322388, "rewards_train/margins_2": 0.7010464668273926, "step": 498 }, { "epoch": 1.49, "logps_train/policy_1_2": -262.50299072265625, "logps_train/policy_1_l": -236.8434295654297, "logps_train/policy_1_w": -159.98175048828125, "logps_train/policy_2_2": -212.14154052734375, "logps_train/policy_2_w": -208.9071044921875, "logps_train/ref_1_2": -272.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -243.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 0.9778260588645935, "rewards_train/1-l": -2.812272548675537, "rewards_train/1-w": 3.9244823455810547, "rewards_train/2-2": 3.0553765296936035, "rewards_train/2-w": 1.5475704669952393, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.736754894256592, "rewards_train/margins_1": 2.946656286716461, "rewards_train/margins_2": 1.5078060626983643, "step": 498 }, { "epoch": 1.49, "logps_train/policy_1_2": -158.85162353515625, "logps_train/policy_1_l": -180.71279907226562, "logps_train/policy_1_w": -136.42796325683594, "logps_train/policy_2_2": -120.34339904785156, "logps_train/policy_2_w": -190.67689514160156, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.3656201362609863, "rewards_train/1-l": -2.2306559085845947, "rewards_train/1-w": 3.267164707183838, "rewards_train/2-2": 3.1482772827148438, "rewards_train/2-w": 1.6984224319458008, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.497820615768433, "rewards_train/margins_1": 1.9015445709228516, "rewards_train/margins_2": 1.449854850769043, "step": 498 }, { "epoch": 1.49, "logps_train/policy_1_2": -117.31683349609375, "logps_train/policy_1_l": -149.09274291992188, "logps_train/policy_1_w": -100.87799835205078, "logps_train/policy_2_2": -91.12091064453125, "logps_train/policy_2_w": -146.4757537841797, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.6456608772277832, "rewards_train/1-l": -3.043454647064209, "rewards_train/1-w": 3.029387950897217, "rewards_train/2-2": 2.4882993698120117, "rewards_train/2-w": 1.0446124076843262, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.072842597961426, "rewards_train/margins_1": 1.3837270736694336, "rewards_train/margins_2": 1.4436869621276855, "step": 498 }, { "epoch": 1.49, "logps_train/policy_1_2": -175.50357055664062, "logps_train/policy_1_l": -143.94888305664062, "logps_train/policy_1_w": -74.40138244628906, "logps_train/policy_2_2": -126.08789825439453, "logps_train/policy_2_w": -101.69773864746094, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 1.1715188026428223, "rewards_train/1-l": -2.230825424194336, "rewards_train/1-w": 2.5942368507385254, "rewards_train/2-2": 3.591209888458252, "rewards_train/2-w": 1.703663945198059, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.825062274932861, "rewards_train/margins_1": 1.4227180480957031, "rewards_train/margins_2": 1.8875459432601929, "step": 498 }, { "epoch": 1.49, "logps_train/policy_1_2": -226.71697998046875, "logps_train/policy_1_l": -191.85763549804688, "logps_train/policy_1_w": -119.92767333984375, "logps_train/policy_2_2": -163.70989990234375, "logps_train/policy_2_w": -161.5065460205078, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.16697359085083, "rewards_train/1-l": -2.9542202949523926, "rewards_train/1-w": 3.57090425491333, "rewards_train/2-2": 4.03091287612915, "rewards_train/2-w": 2.3168747425079346, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.525124549865723, "rewards_train/margins_1": 2.4039306640625, "rewards_train/margins_2": 1.7140381336212158, "step": 499 }, { "epoch": 1.49, "logps_train/policy_1_2": -234.42140197753906, "logps_train/policy_1_l": -210.15431213378906, "logps_train/policy_1_w": -137.318603515625, "logps_train/policy_2_2": -192.61538696289062, "logps_train/policy_2_w": -199.56350708007812, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -233.0, "rewards_train/1-2": 0.717234194278717, "rewards_train/1-l": -2.3298840522766113, "rewards_train/1-w": 4.8353271484375, "rewards_train/2-2": 3.0040860176086426, "rewards_train/2-w": 3.285836696624756, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 7.165211200714111, "rewards_train/margins_1": 4.118092954158783, "rewards_train/margins_2": -0.2817506790161133, "step": 499 }, { "epoch": 1.49, "logps_train/policy_1_2": -102.33434295654297, "logps_train/policy_1_l": -106.3353500366211, "logps_train/policy_1_w": -74.83522033691406, "logps_train/policy_2_2": -71.0703125, "logps_train/policy_2_w": -116.28852844238281, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.3853156566619873, "rewards_train/1-l": -2.5308008193969727, "rewards_train/1-w": 3.0524160861968994, "rewards_train/2-2": 2.4789068698883057, "rewards_train/2-w": 0.6883341073989868, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.583216905593872, "rewards_train/margins_1": 1.667100429534912, "rewards_train/margins_2": 1.7905727624893188, "step": 499 }, { "epoch": 1.49, "logps_train/policy_1_2": -131.78509521484375, "logps_train/policy_1_l": -79.42201232910156, "logps_train/policy_1_w": -98.12118530273438, "logps_train/policy_2_2": -109.47335815429688, "logps_train/policy_2_w": -116.6114501953125, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -68.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.7355537414550781, "rewards_train/1-l": -1.1309711933135986, "rewards_train/1-w": 2.3833889961242676, "rewards_train/2-2": 2.427664041519165, "rewards_train/2-w": 1.5107301473617554, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.514360189437866, "rewards_train/margins_1": 0.6478352546691895, "rewards_train/margins_2": 0.9169338941574097, "step": 499 }, { "epoch": 1.49, "logps_train/policy_1_2": -188.26243591308594, "logps_train/policy_1_l": -153.17391967773438, "logps_train/policy_1_w": -134.38278198242188, "logps_train/policy_2_2": -148.23779296875, "logps_train/policy_2_w": -171.32247924804688, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 2.3034443855285645, "rewards_train/1-l": -1.734968900680542, "rewards_train/1-w": 3.7773473262786865, "rewards_train/2-2": 4.052783012390137, "rewards_train/2-w": 2.230252742767334, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.5123162269592285, "rewards_train/margins_1": 1.473902940750122, "rewards_train/margins_2": 1.8225302696228027, "step": 499 }, { "epoch": 1.49, "logps_train/policy_1_2": -175.8419952392578, "logps_train/policy_1_l": -250.74749755859375, "logps_train/policy_1_w": -220.65390014648438, "logps_train/policy_2_2": -130.29153442382812, "logps_train/policy_2_w": -268.24334716796875, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -264.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -296.0, "rewards_train/1-2": 2.029862642288208, "rewards_train/1-l": -2.95131254196167, "rewards_train/1-w": 4.390859603881836, "rewards_train/2-2": 3.847407817840576, "rewards_train/2-w": 2.7069144248962402, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.342172145843506, "rewards_train/margins_1": 2.360996961593628, "rewards_train/margins_2": 1.140493392944336, "step": 499 }, { "epoch": 1.49, "logps_train/policy_1_2": -168.59629821777344, "logps_train/policy_1_l": -198.54022216796875, "logps_train/policy_1_w": -71.64787292480469, "logps_train/policy_2_2": -121.08985900878906, "logps_train/policy_2_w": -101.53965759277344, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -97.5, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -117.0, "rewards_train/1-2": 1.1763074398040771, "rewards_train/1-l": -3.2278499603271484, "rewards_train/1-w": 2.5992751121520996, "rewards_train/2-2": 3.519139528274536, "rewards_train/2-w": 1.5327521562576294, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.827125072479248, "rewards_train/margins_1": 1.4229676723480225, "rewards_train/margins_2": 1.9863873720169067, "step": 499 }, { "epoch": 1.49, "logps_train/policy_1_2": -119.77799224853516, "logps_train/policy_1_l": -124.63316345214844, "logps_train/policy_1_w": -84.9083480834961, "logps_train/policy_2_2": -92.5618896484375, "logps_train/policy_2_w": -120.73222351074219, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.3065755367279053, "rewards_train/1-l": -1.659409761428833, "rewards_train/1-w": 2.590806007385254, "rewards_train/2-2": 2.2594356536865234, "rewards_train/2-w": 1.4775581359863281, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.250215768814087, "rewards_train/margins_1": 1.2842304706573486, "rewards_train/margins_2": 0.7818775177001953, "step": 499 }, { "epoch": 1.5, "learning_rate": 8.30847593460069e-07, "loss": 0.3992, "step": 500 }, { "epoch": 1.5, "logps_train/policy_1_2": -117.33904266357422, "logps_train/policy_1_l": -137.23350524902344, "logps_train/policy_1_w": -79.33462524414062, "logps_train/policy_2_2": -93.66940307617188, "logps_train/policy_2_w": -96.28828430175781, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -108.0, "rewards_train/1-2": 1.287579894065857, "rewards_train/1-l": -2.3796005249023438, "rewards_train/1-w": 2.1548194885253906, "rewards_train/2-2": 2.1943883895874023, "rewards_train/2-w": 1.1422655582427979, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.534420013427734, "rewards_train/margins_1": 0.8672395944595337, "rewards_train/margins_2": 1.0521228313446045, "step": 500 }, { "epoch": 1.5, "logps_train/policy_1_2": -227.55873107910156, "logps_train/policy_1_l": -149.5686492919922, "logps_train/policy_1_w": -127.91350555419922, "logps_train/policy_2_2": -168.12176513671875, "logps_train/policy_2_w": -177.98797607421875, "logps_train/ref_1_2": -239.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 1.1097513437271118, "rewards_train/1-l": -1.639286756515503, "rewards_train/1-w": 3.591461181640625, "rewards_train/2-2": 3.845245122909546, "rewards_train/2-w": 1.4902644157409668, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.230747938156128, "rewards_train/margins_1": 2.481709837913513, "rewards_train/margins_2": 2.354980707168579, "step": 500 }, { "epoch": 1.5, "logps_train/policy_1_2": -162.95846557617188, "logps_train/policy_1_l": -128.7975311279297, "logps_train/policy_1_w": -115.27879333496094, "logps_train/policy_2_2": -136.957275390625, "logps_train/policy_2_w": -154.5093994140625, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.623781681060791, "rewards_train/1-l": -1.2011393308639526, "rewards_train/1-w": 2.908839702606201, "rewards_train/2-2": 2.898756504058838, "rewards_train/2-w": 1.2818717956542969, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.109979033470154, "rewards_train/margins_1": 1.2850580215454102, "rewards_train/margins_2": 1.616884708404541, "step": 500 }, { "epoch": 1.5, "logps_train/policy_1_2": -219.16355895996094, "logps_train/policy_1_l": -238.28125, "logps_train/policy_1_w": -114.90382385253906, "logps_train/policy_2_2": -169.96084594726562, "logps_train/policy_2_w": -153.95455932617188, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.2617690563201904, "rewards_train/1-l": -3.340625762939453, "rewards_train/1-w": 3.0924301147460938, "rewards_train/2-2": 3.4570412635803223, "rewards_train/2-w": 1.813919186592102, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.433055877685547, "rewards_train/margins_1": 1.8306610584259033, "rewards_train/margins_2": 1.6431220769882202, "step": 500 }, { "epoch": 1.5, "logps_train/policy_1_2": -98.08895111083984, "logps_train/policy_1_l": -161.2637176513672, "logps_train/policy_1_w": -121.6338882446289, "logps_train/policy_2_2": -77.47994232177734, "logps_train/policy_2_w": -157.70877075195312, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 2.1239171028137207, "rewards_train/1-l": -2.942094087600708, "rewards_train/1-w": 3.4084858894348145, "rewards_train/2-2": 2.7559118270874023, "rewards_train/2-w": 1.737716555595398, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.3505799770355225, "rewards_train/margins_1": 1.2845687866210938, "rewards_train/margins_2": 1.0181952714920044, "step": 500 }, { "epoch": 1.5, "logps_train/policy_1_2": -110.2515869140625, "logps_train/policy_1_l": -119.4283218383789, "logps_train/policy_1_w": -90.17594909667969, "logps_train/policy_2_2": -79.6925277709961, "logps_train/policy_2_w": -132.3336944580078, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 1.2732787132263184, "rewards_train/1-l": -1.9750590324401855, "rewards_train/1-w": 3.090999126434326, "rewards_train/2-2": 2.340902805328369, "rewards_train/2-w": 1.4791300296783447, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.066058158874512, "rewards_train/margins_1": 1.8177204132080078, "rewards_train/margins_2": 0.8617727756500244, "step": 500 }, { "epoch": 1.5, "logps_train/policy_1_2": -97.95272064208984, "logps_train/policy_1_l": -126.79252624511719, "logps_train/policy_1_w": -64.66617584228516, "logps_train/policy_2_2": -77.62693786621094, "logps_train/policy_2_w": -90.368408203125, "logps_train/ref_1_2": -111.5, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -100.5, "logps_train/ref_2_w": -103.5, "rewards_train/1-2": 1.360196828842163, "rewards_train/1-l": -1.5304253101348877, "rewards_train/1-w": 2.3880696296691895, "rewards_train/2-2": 2.295900344848633, "rewards_train/2-w": 1.305737018585205, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.918494939804077, "rewards_train/margins_1": 1.0278728008270264, "rewards_train/margins_2": 0.9901633262634277, "step": 500 }, { "epoch": 1.5, "logps_train/policy_1_2": -205.0819091796875, "logps_train/policy_1_l": -218.75457763671875, "logps_train/policy_1_w": -175.40310668945312, "logps_train/policy_2_2": -163.51654052734375, "logps_train/policy_2_w": -217.7908935546875, "logps_train/ref_1_2": -229.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 2.3996214866638184, "rewards_train/1-l": -3.344208240509033, "rewards_train/1-w": 3.0760960578918457, "rewards_train/2-2": 4.006157875061035, "rewards_train/2-w": 1.6552848815917969, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.420304298400879, "rewards_train/margins_1": 0.6764745712280273, "rewards_train/margins_2": 2.3508729934692383, "step": 500 }, { "epoch": 1.5, "logps_train/policy_1_2": -142.78143310546875, "logps_train/policy_1_l": -170.43502807617188, "logps_train/policy_1_w": -169.81576538085938, "logps_train/policy_2_2": -102.0446548461914, "logps_train/policy_2_w": -236.34796142578125, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -207.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -247.0, "rewards_train/1-2": 0.9486145377159119, "rewards_train/1-l": -1.380221962928772, "rewards_train/1-w": 3.7246742248535156, "rewards_train/2-2": 2.417604923248291, "rewards_train/2-w": 1.093329906463623, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.104896187782288, "rewards_train/margins_1": 2.7760596871376038, "rewards_train/margins_2": 1.324275016784668, "step": 501 }, { "epoch": 1.5, "logps_train/policy_1_2": -127.76499938964844, "logps_train/policy_1_l": -123.84573364257812, "logps_train/policy_1_w": -37.93351745605469, "logps_train/policy_2_2": -89.44068145751953, "logps_train/policy_2_w": -67.64096069335938, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -57.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -78.5, "rewards_train/1-2": 1.0031874179840088, "rewards_train/1-l": -2.7783236503601074, "rewards_train/1-w": 1.8933669328689575, "rewards_train/2-2": 2.6403069496154785, "rewards_train/2-w": 1.095279574394226, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.671690583229065, "rewards_train/margins_1": 0.8901795148849487, "rewards_train/margins_2": 1.5450273752212524, "step": 501 }, { "epoch": 1.5, "logps_train/policy_1_2": -207.8204345703125, "logps_train/policy_1_l": -169.2400665283203, "logps_train/policy_1_w": -145.61041259765625, "logps_train/policy_2_2": -157.08316040039062, "logps_train/policy_2_w": -204.61459350585938, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 0.15702080726623535, "rewards_train/1-l": -1.9935383796691895, "rewards_train/1-w": 3.0967721939086914, "rewards_train/2-2": 1.8733247518539429, "rewards_train/2-w": 1.1010406017303467, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.090310573577881, "rewards_train/margins_1": 2.939751386642456, "rewards_train/margins_2": 0.7722841501235962, "step": 501 }, { "epoch": 1.5, "logps_train/policy_1_2": -88.95291137695312, "logps_train/policy_1_l": -149.344482421875, "logps_train/policy_1_w": -136.29525756835938, "logps_train/policy_2_2": -65.90058135986328, "logps_train/policy_2_w": -184.38177490234375, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.4687715768814087, "rewards_train/1-l": -2.0500736236572266, "rewards_train/1-w": 3.312662124633789, "rewards_train/2-2": 2.4763481616973877, "rewards_train/2-w": 0.8008838891983032, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.362735748291016, "rewards_train/margins_1": 1.8438905477523804, "rewards_train/margins_2": 1.6754642724990845, "step": 501 }, { "epoch": 1.5, "logps_train/policy_1_2": -157.95164489746094, "logps_train/policy_1_l": -154.781982421875, "logps_train/policy_1_w": -113.39556884765625, "logps_train/policy_2_2": -133.10787963867188, "logps_train/policy_2_w": -151.59852600097656, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.1907730102539062, "rewards_train/1-l": -1.8272206783294678, "rewards_train/1-w": 2.383099317550659, "rewards_train/2-2": 2.3407750129699707, "rewards_train/2-w": 0.9245222806930542, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.210319995880127, "rewards_train/margins_1": 1.192326307296753, "rewards_train/margins_2": 1.4162527322769165, "step": 501 }, { "epoch": 1.5, "logps_train/policy_1_2": -157.44439697265625, "logps_train/policy_1_l": -253.12081909179688, "logps_train/policy_1_w": -134.73016357421875, "logps_train/policy_2_2": -122.87760162353516, "logps_train/policy_2_w": -179.37899780273438, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.6196227073669434, "rewards_train/1-l": -3.557199001312256, "rewards_train/1-w": 3.5711236000061035, "rewards_train/2-2": 2.9622397422790527, "rewards_train/2-w": 1.721474289894104, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.128322601318359, "rewards_train/margins_1": 1.9515008926391602, "rewards_train/margins_2": 1.2407654523849487, "step": 501 }, { "epoch": 1.5, "logps_train/policy_1_2": -120.38154602050781, "logps_train/policy_1_l": -129.29937744140625, "logps_train/policy_1_w": -79.98611450195312, "logps_train/policy_2_2": -82.34477996826172, "logps_train/policy_2_w": -114.64889526367188, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 0.7395796775817871, "rewards_train/1-l": -2.125739097595215, "rewards_train/1-w": 2.8717007637023926, "rewards_train/2-2": 2.27821683883667, "rewards_train/2-w": 1.6444854736328125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.997439861297607, "rewards_train/margins_1": 2.1321210861206055, "rewards_train/margins_2": 0.6337313652038574, "step": 501 }, { "epoch": 1.5, "logps_train/policy_1_2": -212.15097045898438, "logps_train/policy_1_l": -165.38552856445312, "logps_train/policy_1_w": -99.26170349121094, "logps_train/policy_2_2": -159.56915283203125, "logps_train/policy_2_w": -135.49703979492188, "logps_train/ref_1_2": -225.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.2739653587341309, "rewards_train/1-l": -1.9147248268127441, "rewards_train/1-w": 2.9800796508789062, "rewards_train/2-2": 3.275897979736328, "rewards_train/2-w": 1.773343563079834, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.89480447769165, "rewards_train/margins_1": 1.7061142921447754, "rewards_train/margins_2": 1.5025544166564941, "step": 501 }, { "epoch": 1.5, "learning_rate": 8.125424962044742e-07, "loss": 0.4761, "step": 502 }, { "epoch": 1.5, "logps_train/policy_1_2": -131.36231994628906, "logps_train/policy_1_l": -167.8797607421875, "logps_train/policy_1_w": -139.98556518554688, "logps_train/policy_2_2": -102.12956237792969, "logps_train/policy_2_w": -179.18130493164062, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 1.159080147743225, "rewards_train/1-l": -2.1418826580047607, "rewards_train/1-w": 2.788163423538208, "rewards_train/2-2": 2.2815752029418945, "rewards_train/2-w": 1.4435880184173584, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.930046081542969, "rewards_train/margins_1": 1.629083275794983, "rewards_train/margins_2": 0.8379871845245361, "step": 502 }, { "epoch": 1.5, "logps_train/policy_1_2": -185.66744995117188, "logps_train/policy_1_l": -250.90032958984375, "logps_train/policy_1_w": -220.7947235107422, "logps_train/policy_2_2": -141.84169006347656, "logps_train/policy_2_w": -292.92236328125, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -268.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -308.0, "rewards_train/1-2": 2.345754623413086, "rewards_train/1-l": -2.818157196044922, "rewards_train/1-w": 4.751777648925781, "rewards_train/2-2": 3.4595823287963867, "rewards_train/2-w": 1.4202618598937988, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.569934844970703, "rewards_train/margins_1": 2.4060230255126953, "rewards_train/margins_2": 2.039320468902588, "step": 502 }, { "epoch": 1.5, "logps_train/policy_1_2": -164.81973266601562, "logps_train/policy_1_l": -255.59207153320312, "logps_train/policy_1_w": -180.493896484375, "logps_train/policy_2_2": -124.3644790649414, "logps_train/policy_2_w": -227.1593780517578, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -245.0, "rewards_train/1-2": 1.5195894241333008, "rewards_train/1-l": -2.677955389022827, "rewards_train/1-w": 3.399829626083374, "rewards_train/2-2": 3.275270700454712, "rewards_train/2-w": 1.7668735980987549, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.077785015106201, "rewards_train/margins_1": 1.8802402019500732, "rewards_train/margins_2": 1.508397102355957, "step": 502 }, { "epoch": 1.5, "logps_train/policy_1_2": -232.19747924804688, "logps_train/policy_1_l": -305.468994140625, "logps_train/policy_1_w": -185.28759765625, "logps_train/policy_2_2": -190.81602478027344, "logps_train/policy_2_w": -241.61354064941406, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -272.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -226.0, "logps_train/ref_2_w": -258.0, "rewards_train/1-2": 1.8255642652511597, "rewards_train/1-l": -3.381958484649658, "rewards_train/1-w": 3.904052257537842, "rewards_train/2-2": 3.573085069656372, "rewards_train/2-w": 1.619895577430725, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.2860107421875, "rewards_train/margins_1": 2.078487992286682, "rewards_train/margins_2": 1.953189492225647, "step": 502 }, { "epoch": 1.5, "logps_train/policy_1_2": -204.16302490234375, "logps_train/policy_1_l": -197.09068298339844, "logps_train/policy_1_w": -164.0004119873047, "logps_train/policy_2_2": -171.2374267578125, "logps_train/policy_2_w": -198.66744995117188, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 2.7774481773376465, "rewards_train/1-l": -2.106334924697876, "rewards_train/1-w": 4.018708229064941, "rewards_train/2-2": 4.0106329917907715, "rewards_train/2-w": 2.38325572013855, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.125043153762817, "rewards_train/margins_1": 1.241260051727295, "rewards_train/margins_2": 1.6273772716522217, "step": 502 }, { "epoch": 1.5, "logps_train/policy_1_2": -182.6461639404297, "logps_train/policy_1_l": -132.67987060546875, "logps_train/policy_1_w": -117.01441955566406, "logps_train/policy_2_2": -142.85354614257812, "logps_train/policy_2_w": -165.98912048339844, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.4135081768035889, "rewards_train/1-l": -2.302558183670044, "rewards_train/1-w": 3.632932662963867, "rewards_train/2-2": 3.061521053314209, "rewards_train/2-w": 1.6885877847671509, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.935490846633911, "rewards_train/margins_1": 2.2194244861602783, "rewards_train/margins_2": 1.372933268547058, "step": 502 }, { "epoch": 1.5, "logps_train/policy_1_2": -220.91160583496094, "logps_train/policy_1_l": -144.02001953125, "logps_train/policy_1_w": -132.77499389648438, "logps_train/policy_2_2": -178.42120361328125, "logps_train/policy_2_w": -169.84022521972656, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 0.7150894999504089, "rewards_train/1-l": -1.4402823448181152, "rewards_train/1-w": 3.018984794616699, "rewards_train/2-2": 2.8059275150299072, "rewards_train/2-w": 1.3714473247528076, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.4592671394348145, "rewards_train/margins_1": 2.3038952946662903, "rewards_train/margins_2": 1.4344801902770996, "step": 502 }, { "epoch": 1.5, "logps_train/policy_1_2": -120.82501220703125, "logps_train/policy_1_l": -124.69314575195312, "logps_train/policy_1_w": -109.0452880859375, "logps_train/policy_2_2": -99.91853332519531, "logps_train/policy_2_w": -143.2117156982422, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.641327142715454, "rewards_train/1-l": -2.06821608543396, "rewards_train/1-w": 2.8419554233551025, "rewards_train/2-2": 2.662443161010742, "rewards_train/2-w": 1.3362505435943604, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.9101715087890625, "rewards_train/margins_1": 1.2006282806396484, "rewards_train/margins_2": 1.3261926174163818, "step": 502 }, { "epoch": 1.51, "logps_train/policy_1_2": -200.19810485839844, "logps_train/policy_1_l": -192.97752380371094, "logps_train/policy_1_w": -127.25579833984375, "logps_train/policy_2_2": -164.24769592285156, "logps_train/policy_2_w": -164.88937377929688, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.1708149909973145, "rewards_train/1-l": -3.045017719268799, "rewards_train/1-w": 3.9763731956481934, "rewards_train/2-2": 2.6295268535614014, "rewards_train/2-w": 2.4149670600891113, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.021390914916992, "rewards_train/margins_1": 2.805558204650879, "rewards_train/margins_2": 0.21455979347229004, "step": 503 }, { "epoch": 1.51, "logps_train/policy_1_2": -159.2109832763672, "logps_train/policy_1_l": -154.76914978027344, "logps_train/policy_1_w": -89.58491516113281, "logps_train/policy_2_2": -121.17289733886719, "logps_train/policy_2_w": -119.08755493164062, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.9554640054702759, "rewards_train/1-l": -2.4995713233947754, "rewards_train/1-w": 3.5165088176727295, "rewards_train/2-2": 3.3444294929504395, "rewards_train/2-w": 2.447495222091675, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.016080141067505, "rewards_train/margins_1": 1.5610448122024536, "rewards_train/margins_2": 0.8969342708587646, "step": 503 }, { "epoch": 1.51, "logps_train/policy_1_2": -275.17120361328125, "logps_train/policy_1_l": -204.7389373779297, "logps_train/policy_1_w": -145.96038818359375, "logps_train/policy_2_2": -224.41822814941406, "logps_train/policy_2_w": -185.09304809570312, "logps_train/ref_1_2": -300.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -270.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 2.370380163192749, "rewards_train/1-l": -3.3176443576812744, "rewards_train/1-w": 4.410210132598877, "rewards_train/2-2": 4.620678424835205, "rewards_train/2-w": 2.7594451904296875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.727854490280151, "rewards_train/margins_1": 2.039829969406128, "rewards_train/margins_2": 1.8612332344055176, "step": 503 }, { "epoch": 1.51, "logps_train/policy_1_2": -102.47352600097656, "logps_train/policy_1_l": -91.29097747802734, "logps_train/policy_1_w": -65.23777770996094, "logps_train/policy_2_2": -76.94696044921875, "logps_train/policy_2_w": -92.21560668945312, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -82.5, "logps_train/ref_1_w": -83.5, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -99.5, "rewards_train/1-2": 1.176865816116333, "rewards_train/1-l": -0.9037069082260132, "rewards_train/1-w": 1.8250503540039062, "rewards_train/2-2": 1.98421049118042, "rewards_train/2-w": 0.7194555997848511, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.7287572622299194, "rewards_train/margins_1": 0.6481845378875732, "rewards_train/margins_2": 1.2647548913955688, "step": 503 }, { "epoch": 1.51, "logps_train/policy_1_2": -155.92523193359375, "logps_train/policy_1_l": -214.16567993164062, "logps_train/policy_1_w": -120.20243835449219, "logps_train/policy_2_2": -132.17442321777344, "logps_train/policy_2_w": -142.91702270507812, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.458061933517456, "rewards_train/1-l": -1.5489903688430786, "rewards_train/1-w": 2.395967960357666, "rewards_train/2-2": 2.34915828704834, "rewards_train/2-w": 1.4995086193084717, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9449583292007446, "rewards_train/margins_1": 0.93790602684021, "rewards_train/margins_2": 0.8496496677398682, "step": 503 }, { "epoch": 1.51, "logps_train/policy_1_2": -207.48284912109375, "logps_train/policy_1_l": -233.85621643066406, "logps_train/policy_1_w": -164.95150756835938, "logps_train/policy_2_2": -162.94696044921875, "logps_train/policy_2_w": -214.78445434570312, "logps_train/ref_1_2": -233.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -225.0, "rewards_train/1-2": 2.537651777267456, "rewards_train/1-l": -2.5348410606384277, "rewards_train/1-w": 3.6970362663269043, "rewards_train/2-2": 4.5295233726501465, "rewards_train/2-w": 1.0738983154296875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.231877326965332, "rewards_train/margins_1": 1.1593844890594482, "rewards_train/margins_2": 3.455625057220459, "step": 503 }, { "epoch": 1.51, "logps_train/policy_1_2": -161.19305419921875, "logps_train/policy_1_l": -154.58853149414062, "logps_train/policy_1_w": -135.14816284179688, "logps_train/policy_2_2": -136.99989318847656, "logps_train/policy_2_w": -178.53038024902344, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.2291316986083984, "rewards_train/1-l": -1.6670563220977783, "rewards_train/1-w": 3.233816146850586, "rewards_train/2-2": 2.186534881591797, "rewards_train/2-w": 1.5102441310882568, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.900872468948364, "rewards_train/margins_1": 2.0046844482421875, "rewards_train/margins_2": 0.67629075050354, "step": 503 }, { "epoch": 1.51, "logps_train/policy_1_2": -204.12051391601562, "logps_train/policy_1_l": -194.6975555419922, "logps_train/policy_1_w": -200.80255126953125, "logps_train/policy_2_2": -157.740234375, "logps_train/policy_2_w": -259.0808410644531, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -238.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -274.0, "rewards_train/1-2": 2.4414634704589844, "rewards_train/1-l": -2.5010054111480713, "rewards_train/1-w": 3.724433183670044, "rewards_train/2-2": 4.146679878234863, "rewards_train/2-w": 1.461641550064087, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.225438594818115, "rewards_train/margins_1": 1.2829697132110596, "rewards_train/margins_2": 2.6850383281707764, "step": 503 }, { "epoch": 1.51, "learning_rate": 7.944020918264458e-07, "loss": 0.4117, "step": 504 }, { "epoch": 1.51, "logps_train/policy_1_2": -110.9346923828125, "logps_train/policy_1_l": -87.04177856445312, "logps_train/policy_1_w": -93.99153137207031, "logps_train/policy_2_2": -83.86284637451172, "logps_train/policy_2_w": -126.67967987060547, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -74.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 1.1893433332443237, "rewards_train/1-l": -1.293825626373291, "rewards_train/1-w": 3.0383477210998535, "rewards_train/2-2": 2.3445749282836914, "rewards_train/2-w": 1.7996110916137695, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.3321733474731445, "rewards_train/margins_1": 1.8490043878555298, "rewards_train/margins_2": 0.5449638366699219, "step": 504 }, { "epoch": 1.51, "logps_train/policy_1_2": -168.53787231445312, "logps_train/policy_1_l": -193.28404235839844, "logps_train/policy_1_w": -118.4748764038086, "logps_train/policy_2_2": -116.12458801269531, "logps_train/policy_2_w": -157.02293395996094, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 0.9946509003639221, "rewards_train/1-l": -2.849888801574707, "rewards_train/1-w": 2.5958714485168457, "rewards_train/2-2": 3.0703539848327637, "rewards_train/2-w": 1.3016130924224854, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.445760250091553, "rewards_train/margins_1": 1.6012205481529236, "rewards_train/margins_2": 1.7687408924102783, "step": 504 }, { "epoch": 1.51, "logps_train/policy_1_2": -200.96096801757812, "logps_train/policy_1_l": -221.75357055664062, "logps_train/policy_1_w": -126.08331298828125, "logps_train/policy_2_2": -146.2803955078125, "logps_train/policy_2_w": -174.869140625, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.6414021253585815, "rewards_train/1-l": -3.7722325325012207, "rewards_train/1-w": 3.158660650253296, "rewards_train/2-2": 3.7344610691070557, "rewards_train/2-w": 1.585546612739563, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.930893182754517, "rewards_train/margins_1": 1.5172585248947144, "rewards_train/margins_2": 2.1489144563674927, "step": 504 }, { "epoch": 1.51, "logps_train/policy_1_2": -106.40396118164062, "logps_train/policy_1_l": -108.85211181640625, "logps_train/policy_1_w": -104.85127258300781, "logps_train/policy_2_2": -77.00048828125, "logps_train/policy_2_w": -148.98486328125, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.02522873878479, "rewards_train/1-l": -1.646929383277893, "rewards_train/1-w": 3.117997646331787, "rewards_train/2-2": 1.9671382904052734, "rewards_train/2-w": 1.0733890533447266, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.76492702960968, "rewards_train/margins_1": 2.092768907546997, "rewards_train/margins_2": 0.8937492370605469, "step": 504 }, { "epoch": 1.51, "logps_train/policy_1_2": -189.6370849609375, "logps_train/policy_1_l": -150.13674926757812, "logps_train/policy_1_w": -135.90774536132812, "logps_train/policy_2_2": -135.56036376953125, "logps_train/policy_2_w": -187.99957275390625, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.592540979385376, "rewards_train/1-l": -1.7832072973251343, "rewards_train/1-w": 3.210787773132324, "rewards_train/2-2": 3.5439646244049072, "rewards_train/2-w": 1.1766047477722168, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.9939950704574585, "rewards_train/margins_1": 1.6182467937469482, "rewards_train/margins_2": 2.3673598766326904, "step": 504 }, { "epoch": 1.51, "logps_train/policy_1_2": -117.1773452758789, "logps_train/policy_1_l": -110.55842590332031, "logps_train/policy_1_w": -58.21045684814453, "logps_train/policy_2_2": -87.0414047241211, "logps_train/policy_2_w": -72.6971435546875, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -76.5, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -85.0, "rewards_train/1-2": 1.1291406154632568, "rewards_train/1-l": -1.7985788583755493, "rewards_train/1-w": 1.8405143022537231, "rewards_train/2-2": 2.4044532775878906, "rewards_train/2-w": 1.2236450910568237, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6390931606292725, "rewards_train/margins_1": 0.7113736867904663, "rewards_train/margins_2": 1.180808186531067, "step": 504 }, { "epoch": 1.51, "logps_train/policy_1_2": -160.36557006835938, "logps_train/policy_1_l": -119.21229553222656, "logps_train/policy_1_w": -92.12327575683594, "logps_train/policy_2_2": -127.75900268554688, "logps_train/policy_2_w": -130.7401885986328, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -98.5, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 2.0446937084198, "rewards_train/1-l": -2.064979314804077, "rewards_train/1-w": 3.4189229011535645, "rewards_train/2-2": 3.3381619453430176, "rewards_train/2-w": 1.8228566646575928, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.483902215957642, "rewards_train/margins_1": 1.3742291927337646, "rewards_train/margins_2": 1.5153052806854248, "step": 504 }, { "epoch": 1.51, "logps_train/policy_1_2": -175.0115203857422, "logps_train/policy_1_l": -275.45489501953125, "logps_train/policy_1_w": -182.59445190429688, "logps_train/policy_2_2": -126.67027282714844, "logps_train/policy_2_w": -249.92892456054688, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -224.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -264.0, "rewards_train/1-2": 1.5050978660583496, "rewards_train/1-l": -5.1058430671691895, "rewards_train/1-w": 3.719851016998291, "rewards_train/2-2": 3.3517227172851562, "rewards_train/2-w": 1.252420425415039, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 8.82569408416748, "rewards_train/margins_1": 2.2147531509399414, "rewards_train/margins_2": 2.099302291870117, "step": 504 }, { "epoch": 1.51, "logps_train/policy_1_2": -157.0841064453125, "logps_train/policy_1_l": -147.0911102294922, "logps_train/policy_1_w": -85.18875122070312, "logps_train/policy_2_2": -111.39031982421875, "logps_train/policy_2_w": -118.99653625488281, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.0587778091430664, "rewards_train/1-l": -1.2557909488677979, "rewards_train/1-w": 2.902218818664551, "rewards_train/2-2": 2.8492493629455566, "rewards_train/2-w": 1.466752529144287, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.158009767532349, "rewards_train/margins_1": 1.8434410095214844, "rewards_train/margins_2": 1.3824968338012695, "step": 505 }, { "epoch": 1.51, "logps_train/policy_1_2": -215.101806640625, "logps_train/policy_1_l": -316.49267578125, "logps_train/policy_1_w": -187.73202514648438, "logps_train/policy_2_2": -167.07376098632812, "logps_train/policy_2_w": -257.6152648925781, "logps_train/ref_1_2": -237.0, "logps_train/ref_1_l": -290.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 2.236694812774658, "rewards_train/1-l": -2.6641104221343994, "rewards_train/1-w": 3.6119537353515625, "rewards_train/2-2": 3.7301230430603027, "rewards_train/2-w": 0.9314415454864502, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.276064157485962, "rewards_train/margins_1": 1.3752589225769043, "rewards_train/margins_2": 2.7986814975738525, "step": 505 }, { "epoch": 1.51, "logps_train/policy_1_2": -158.17874145507812, "logps_train/policy_1_l": -155.98477172851562, "logps_train/policy_1_w": -140.65231323242188, "logps_train/policy_2_2": -123.57658386230469, "logps_train/policy_2_w": -184.65724182128906, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.9290004968643188, "rewards_train/1-l": -1.5508216619491577, "rewards_train/1-w": 4.031644344329834, "rewards_train/2-2": 3.2673416137695312, "rewards_train/2-w": 1.9053699970245361, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.582466006278992, "rewards_train/margins_1": 2.102643847465515, "rewards_train/margins_2": 1.3619716167449951, "step": 505 }, { "epoch": 1.51, "logps_train/policy_1_2": -135.1802215576172, "logps_train/policy_1_l": -113.9405517578125, "logps_train/policy_1_w": -61.651397705078125, "logps_train/policy_2_2": -108.0075454711914, "logps_train/policy_2_w": -92.00243377685547, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -107.5, "rewards_train/1-2": 1.6485791206359863, "rewards_train/1-l": -1.1196414232254028, "rewards_train/1-w": 2.700875759124756, "rewards_train/2-2": 2.988992214202881, "rewards_train/2-w": 1.5778818130493164, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8205171823501587, "rewards_train/margins_1": 1.0522966384887695, "rewards_train/margins_2": 1.4111104011535645, "step": 505 }, { "epoch": 1.51, "logps_train/policy_1_2": -156.4471893310547, "logps_train/policy_1_l": -178.90830993652344, "logps_train/policy_1_w": -169.44381713867188, "logps_train/policy_2_2": -126.0013198852539, "logps_train/policy_2_w": -195.27789306640625, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.7326250076293945, "rewards_train/1-l": -2.4573099613189697, "rewards_train/1-w": 2.930227756500244, "rewards_train/2-2": 2.7772116661071777, "rewards_train/2-w": 1.836859941482544, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.387537717819214, "rewards_train/margins_1": 1.1976027488708496, "rewards_train/margins_2": 0.9403517246246338, "step": 505 }, { "epoch": 1.51, "logps_train/policy_1_2": -179.88333129882812, "logps_train/policy_1_l": -120.58589172363281, "logps_train/policy_1_w": -109.89781188964844, "logps_train/policy_2_2": -110.64054870605469, "logps_train/policy_2_w": -153.51861572265625, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 0.8272926807403564, "rewards_train/1-l": -1.2335891723632812, "rewards_train/1-w": 3.3945937156677246, "rewards_train/2-2": 3.297664165496826, "rewards_train/2-w": 1.6309505701065063, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.628182888031006, "rewards_train/margins_1": 2.567301034927368, "rewards_train/margins_2": 1.6667135953903198, "step": 505 }, { "epoch": 1.51, "logps_train/policy_1_2": -115.18677520751953, "logps_train/policy_1_l": -155.63702392578125, "logps_train/policy_1_w": -95.64305114746094, "logps_train/policy_2_2": -85.70165252685547, "logps_train/policy_2_w": -125.10813903808594, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 1.3629628419876099, "rewards_train/1-l": -2.02522611618042, "rewards_train/1-w": 3.0493669509887695, "rewards_train/2-2": 2.585303544998169, "rewards_train/2-w": 1.7930920124053955, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.0745930671691895, "rewards_train/margins_1": 1.6864041090011597, "rewards_train/margins_2": 0.7922115325927734, "step": 505 }, { "epoch": 1.51, "logps_train/policy_1_2": -166.01336669921875, "logps_train/policy_1_l": -204.38491821289062, "logps_train/policy_1_w": -118.31800079345703, "logps_train/policy_2_2": -136.06399536132812, "logps_train/policy_2_w": -155.30018615722656, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 2.5799143314361572, "rewards_train/1-l": -3.236929178237915, "rewards_train/1-w": 4.26194953918457, "rewards_train/2-2": 4.093601226806641, "rewards_train/2-w": 2.944981813430786, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.498878717422485, "rewards_train/margins_1": 1.682035207748413, "rewards_train/margins_2": 1.1486194133758545, "step": 505 }, { "epoch": 1.51, "learning_rate": 7.764281507974711e-07, "loss": 0.3504, "step": 506 }, { "epoch": 1.51, "logps_train/policy_1_2": -113.72879028320312, "logps_train/policy_1_l": -139.83201599121094, "logps_train/policy_1_w": -121.32882690429688, "logps_train/policy_2_2": -78.93209838867188, "logps_train/policy_2_w": -175.04690551757812, "logps_train/ref_1_2": -125.5, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.1536829471588135, "rewards_train/1-l": -1.529832124710083, "rewards_train/1-w": 3.245242118835449, "rewards_train/2-2": 2.4739773273468018, "rewards_train/2-w": 1.3578089475631714, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.775074243545532, "rewards_train/margins_1": 2.0915591716766357, "rewards_train/margins_2": 1.1161683797836304, "step": 506 }, { "epoch": 1.51, "logps_train/policy_1_2": -104.44009399414062, "logps_train/policy_1_l": -107.4901123046875, "logps_train/policy_1_w": -62.261322021484375, "logps_train/policy_2_2": -77.47157287597656, "logps_train/policy_2_w": -87.87396240234375, "logps_train/ref_1_2": -115.5, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -102.5, "logps_train/ref_2_w": -107.5, "rewards_train/1-2": 1.0805752277374268, "rewards_train/1-l": -1.1472536325454712, "rewards_train/1-w": 2.8129305839538574, "rewards_train/2-2": 2.4943466186523438, "rewards_train/2-w": 1.9626030921936035, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.9601842164993286, "rewards_train/margins_1": 1.7323553562164307, "rewards_train/margins_2": 0.5317435264587402, "step": 506 }, { "epoch": 1.51, "logps_train/policy_1_2": -161.6208953857422, "logps_train/policy_1_l": -232.40573120117188, "logps_train/policy_1_w": -131.00245666503906, "logps_train/policy_2_2": -120.18611145019531, "logps_train/policy_2_w": -181.21087646484375, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.0871293544769287, "rewards_train/1-l": -3.7780728340148926, "rewards_train/1-w": 3.2450666427612305, "rewards_train/2-2": 2.7985754013061523, "rewards_train/2-w": 1.4289124011993408, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.023139476776123, "rewards_train/margins_1": 2.1579372882843018, "rewards_train/margins_2": 1.3696630001068115, "step": 506 }, { "epoch": 1.51, "logps_train/policy_1_2": -157.90219116210938, "logps_train/policy_1_l": -173.08163452148438, "logps_train/policy_1_w": -103.50083923339844, "logps_train/policy_2_2": -122.3093032836914, "logps_train/policy_2_w": -137.09744262695312, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.1941558122634888, "rewards_train/1-l": -2.3001561164855957, "rewards_train/1-w": 2.9811668395996094, "rewards_train/2-2": 2.972194194793701, "rewards_train/2-w": 1.6355693340301514, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.281322956085205, "rewards_train/margins_1": 1.7870110273361206, "rewards_train/margins_2": 1.3366248607635498, "step": 506 }, { "epoch": 1.51, "logps_train/policy_1_2": -126.72373962402344, "logps_train/policy_1_l": -102.34634399414062, "logps_train/policy_1_w": -57.66343688964844, "logps_train/policy_2_2": -102.52471160888672, "logps_train/policy_2_w": -77.16474151611328, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -90.0, "rewards_train/1-2": 1.2963755130767822, "rewards_train/1-l": -1.53775954246521, "rewards_train/1-w": 2.059046983718872, "rewards_train/2-2": 2.474872589111328, "rewards_train/2-w": 1.3030567169189453, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.596806526184082, "rewards_train/margins_1": 0.7626714706420898, "rewards_train/margins_2": 1.1718158721923828, "step": 506 }, { "epoch": 1.51, "logps_train/policy_1_2": -107.28485107421875, "logps_train/policy_1_l": -98.70645141601562, "logps_train/policy_1_w": -87.42805480957031, "logps_train/policy_2_2": -77.9340591430664, "logps_train/policy_2_w": -105.55413818359375, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -83.5, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -99.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 0.8722964525222778, "rewards_train/1-l": -1.5140042304992676, "rewards_train/1-w": 2.761101484298706, "rewards_train/2-2": 2.125734806060791, "rewards_train/2-w": 1.7797428369522095, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.275105714797974, "rewards_train/margins_1": 1.8888050317764282, "rewards_train/margins_2": 0.34599196910858154, "step": 506 }, { "epoch": 1.51, "logps_train/policy_1_2": -167.46243286132812, "logps_train/policy_1_l": -168.6393585205078, "logps_train/policy_1_w": -97.2812728881836, "logps_train/policy_2_2": -134.63595581054688, "logps_train/policy_2_w": -120.08615112304688, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.94281804561615, "rewards_train/1-l": -2.460810661315918, "rewards_train/1-w": 2.262498140335083, "rewards_train/2-2": 3.1520299911499023, "rewards_train/2-w": 1.3773223161697388, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.723308801651001, "rewards_train/margins_1": 0.3196800947189331, "rewards_train/margins_2": 1.7747076749801636, "step": 506 }, { "epoch": 1.51, "logps_train/policy_1_2": -178.99208068847656, "logps_train/policy_1_l": -184.5857696533203, "logps_train/policy_1_w": -159.11605834960938, "logps_train/policy_2_2": -137.1102752685547, "logps_train/policy_2_w": -222.6085205078125, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -237.0, "rewards_train/1-2": 1.8226662874221802, "rewards_train/1-l": -2.8906075954437256, "rewards_train/1-w": 4.236830711364746, "rewards_train/2-2": 3.2983479499816895, "rewards_train/2-w": 1.4047739505767822, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.127438306808472, "rewards_train/margins_1": 2.414164423942566, "rewards_train/margins_2": 1.8935739994049072, "step": 506 }, { "epoch": 1.52, "logps_train/policy_1_2": -201.30299377441406, "logps_train/policy_1_l": -192.19497680664062, "logps_train/policy_1_w": -191.52752685546875, "logps_train/policy_2_2": -153.37960815429688, "logps_train/policy_2_w": -254.21922302246094, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -241.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -276.0, "rewards_train/1-2": 1.960324764251709, "rewards_train/1-l": -2.439810276031494, "rewards_train/1-w": 4.915996551513672, "rewards_train/2-2": 3.68078875541687, "rewards_train/2-w": 2.139014959335327, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.355806827545166, "rewards_train/margins_1": 2.955671787261963, "rewards_train/margins_2": 1.541773796081543, "step": 507 }, { "epoch": 1.52, "logps_train/policy_1_2": -220.53408813476562, "logps_train/policy_1_l": -240.63868713378906, "logps_train/policy_1_w": -206.15377807617188, "logps_train/policy_2_2": -173.91665649414062, "logps_train/policy_2_w": -248.98768615722656, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -236.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 1.5778398513793945, "rewards_train/1-l": -2.8802757263183594, "rewards_train/1-w": 3.0012238025665283, "rewards_train/2-2": 3.4559903144836426, "rewards_train/2-w": 1.0387310981750488, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.881499528884888, "rewards_train/margins_1": 1.4233839511871338, "rewards_train/margins_2": 2.4172592163085938, "step": 507 }, { "epoch": 1.52, "logps_train/policy_1_2": -84.46217346191406, "logps_train/policy_1_l": -66.73291778564453, "logps_train/policy_1_w": -37.26914978027344, "logps_train/policy_2_2": -69.15827941894531, "logps_train/policy_2_w": -53.13665771484375, "logps_train/ref_1_2": -91.0, "logps_train/ref_1_l": -53.75, "logps_train/ref_1_w": -51.25, "logps_train/ref_2_2": -82.0, "logps_train/ref_2_w": -58.0, "rewards_train/1-2": 0.6549547910690308, "rewards_train/1-l": -1.2947757244110107, "rewards_train/1-w": 1.4062881469726562, "rewards_train/2-2": 1.2654218673706055, "rewards_train/2-w": 0.47695931792259216, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.701063871383667, "rewards_train/margins_1": 0.7513333559036255, "rewards_train/margins_2": 0.7884625494480133, "step": 507 }, { "epoch": 1.52, "logps_train/policy_1_2": -172.39443969726562, "logps_train/policy_1_l": -225.04248046875, "logps_train/policy_1_w": -152.79977416992188, "logps_train/policy_2_2": -122.98138427734375, "logps_train/policy_2_w": -227.84703063964844, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -193.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.2980560064315796, "rewards_train/1-l": -3.166064500808716, "rewards_train/1-w": 4.145021438598633, "rewards_train/2-2": 3.0924859046936035, "rewards_train/2-w": 1.0277972221374512, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.311085939407349, "rewards_train/margins_1": 2.8469654321670532, "rewards_train/margins_2": 2.0646886825561523, "step": 507 }, { "epoch": 1.52, "logps_train/policy_1_2": -92.79490661621094, "logps_train/policy_1_l": -89.83677673339844, "logps_train/policy_1_w": -76.26872253417969, "logps_train/policy_2_2": -76.2021484375, "logps_train/policy_2_w": -88.18438720703125, "logps_train/ref_1_2": -106.0, "logps_train/ref_1_l": -79.5, "logps_train/ref_1_w": -98.5, "logps_train/ref_2_2": -99.0, "logps_train/ref_2_w": -104.0, "rewards_train/1-2": 1.347071886062622, "rewards_train/1-l": -1.0414907932281494, "rewards_train/1-w": 2.231379985809326, "rewards_train/2-2": 2.248046875, "rewards_train/2-w": 1.582879662513733, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2728707790374756, "rewards_train/margins_1": 0.8843080997467041, "rewards_train/margins_2": 0.6651672124862671, "step": 507 }, { "epoch": 1.52, "logps_train/policy_1_2": -237.41661071777344, "logps_train/policy_1_l": -223.1405792236328, "logps_train/policy_1_w": -199.69070434570312, "logps_train/policy_2_2": -184.14686584472656, "logps_train/policy_2_w": -238.36474609375, "logps_train/ref_1_2": -264.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -239.0, "logps_train/ref_2_2": -230.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 2.5802156925201416, "rewards_train/1-l": -1.6863224506378174, "rewards_train/1-w": 3.9473352432250977, "rewards_train/2-2": 4.5603132247924805, "rewards_train/2-w": 2.244776725769043, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.633657693862915, "rewards_train/margins_1": 1.367119550704956, "rewards_train/margins_2": 2.3155364990234375, "step": 507 }, { "epoch": 1.52, "logps_train/policy_1_2": -158.28875732421875, "logps_train/policy_1_l": -180.01025390625, "logps_train/policy_1_w": -93.31501770019531, "logps_train/policy_2_2": -128.68374633789062, "logps_train/policy_2_w": -118.32191467285156, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 1.636603593826294, "rewards_train/1-l": -2.6574206352233887, "rewards_train/1-w": 2.3415451049804688, "rewards_train/2-2": 2.549618721008301, "rewards_train/2-w": 1.628746747970581, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.998965740203857, "rewards_train/margins_1": 0.7049415111541748, "rewards_train/margins_2": 0.9208719730377197, "step": 507 }, { "epoch": 1.52, "logps_train/policy_1_2": -107.98054504394531, "logps_train/policy_1_l": -151.63922119140625, "logps_train/policy_1_w": -83.73777770996094, "logps_train/policy_2_2": -90.63592529296875, "logps_train/policy_2_w": -110.15821838378906, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 1.2949146032333374, "rewards_train/1-l": -2.1248109340667725, "rewards_train/1-w": 2.377784252166748, "rewards_train/2-2": 2.220782518386841, "rewards_train/2-w": 1.4154282808303833, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.5025951862335205, "rewards_train/margins_1": 1.0828696489334106, "rewards_train/margins_2": 0.8053542375564575, "step": 507 }, { "epoch": 1.52, "learning_rate": 7.586224273425083e-07, "loss": 0.4468, "step": 508 }, { "epoch": 1.52, "logps_train/policy_1_2": -240.36802673339844, "logps_train/policy_1_l": -179.85926818847656, "logps_train/policy_1_w": -134.23419189453125, "logps_train/policy_2_2": -178.55392456054688, "logps_train/policy_2_w": -182.27761840820312, "logps_train/ref_1_2": -256.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 1.480384349822998, "rewards_train/1-l": -2.5851449966430664, "rewards_train/1-w": 3.5820493698120117, "rewards_train/2-2": 4.325858116149902, "rewards_train/2-w": 1.8331761360168457, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.167194366455078, "rewards_train/margins_1": 2.1016650199890137, "rewards_train/margins_2": 2.4926819801330566, "step": 508 }, { "epoch": 1.52, "logps_train/policy_1_2": -116.08075714111328, "logps_train/policy_1_l": -122.81453704833984, "logps_train/policy_1_w": -100.62250518798828, "logps_train/policy_2_2": -92.81826782226562, "logps_train/policy_2_w": -141.23660278320312, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 2.035674571990967, "rewards_train/1-l": -1.3619225025177002, "rewards_train/1-w": 3.704936981201172, "rewards_train/2-2": 2.812704563140869, "rewards_train/2-w": 1.7654016017913818, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.066859483718872, "rewards_train/margins_1": 1.669262409210205, "rewards_train/margins_2": 1.0473029613494873, "step": 508 }, { "epoch": 1.52, "logps_train/policy_1_2": -100.52633666992188, "logps_train/policy_1_l": -82.14893341064453, "logps_train/policy_1_w": -62.26259994506836, "logps_train/policy_2_2": -74.91967010498047, "logps_train/policy_2_w": -79.89421844482422, "logps_train/ref_1_2": -114.5, "logps_train/ref_1_l": -67.5, "logps_train/ref_1_w": -82.0, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -91.5, "rewards_train/1-2": 1.370803713798523, "rewards_train/1-l": -1.4340341091156006, "rewards_train/1-w": 1.9975680112838745, "rewards_train/2-2": 2.4908456802368164, "rewards_train/2-w": 1.1629219055175781, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.431602120399475, "rewards_train/margins_1": 0.6267642974853516, "rewards_train/margins_2": 1.3279237747192383, "step": 508 }, { "epoch": 1.52, "logps_train/policy_1_2": -151.1949005126953, "logps_train/policy_1_l": -139.30831909179688, "logps_train/policy_1_w": -137.7638397216797, "logps_train/policy_2_2": -110.9665756225586, "logps_train/policy_2_w": -173.93553161621094, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 2.1258225440979004, "rewards_train/1-l": -2.016770362854004, "rewards_train/1-w": 3.2064290046691895, "rewards_train/2-2": 3.6064677238464355, "rewards_train/2-w": 1.8345720767974854, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.223199367523193, "rewards_train/margins_1": 1.080606460571289, "rewards_train/margins_2": 1.7718956470489502, "step": 508 }, { "epoch": 1.52, "logps_train/policy_1_2": -124.43919372558594, "logps_train/policy_1_l": -127.49776458740234, "logps_train/policy_1_w": -112.53768920898438, "logps_train/policy_2_2": -93.16149139404297, "logps_train/policy_2_w": -146.57872009277344, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.315455436706543, "rewards_train/1-l": -1.4040732383728027, "rewards_train/1-w": 2.819669246673584, "rewards_train/2-2": 2.5471324920654297, "rewards_train/2-w": 1.3515028953552246, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.223742485046387, "rewards_train/margins_1": 1.504213809967041, "rewards_train/margins_2": 1.195629596710205, "step": 508 }, { "epoch": 1.52, "logps_train/policy_1_2": -80.2224349975586, "logps_train/policy_1_l": -113.51319885253906, "logps_train/policy_1_w": -76.27299499511719, "logps_train/policy_2_2": -55.53571319580078, "logps_train/policy_2_w": -139.82171630859375, "logps_train/ref_1_2": -92.5, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -77.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.2480695247650146, "rewards_train/1-l": -1.912257432937622, "rewards_train/1-w": 3.022700786590576, "rewards_train/2-2": 2.151115894317627, "rewards_train/2-w": 0.8334544897079468, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.934958219528198, "rewards_train/margins_1": 1.7746312618255615, "rewards_train/margins_2": 1.3176614046096802, "step": 508 }, { "epoch": 1.52, "logps_train/policy_1_2": -141.11424255371094, "logps_train/policy_1_l": -97.0833740234375, "logps_train/policy_1_w": -107.12811279296875, "logps_train/policy_2_2": -107.0318374633789, "logps_train/policy_2_w": -136.45431518554688, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -80.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.5022480487823486, "rewards_train/1-l": -1.687438726425171, "rewards_train/1-w": 2.6405086517333984, "rewards_train/2-2": 3.1253321170806885, "rewards_train/2-w": 1.4287874698638916, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.327947378158569, "rewards_train/margins_1": 1.1382606029510498, "rewards_train/margins_2": 1.6965446472167969, "step": 508 }, { "epoch": 1.52, "logps_train/policy_1_2": -140.5440673828125, "logps_train/policy_1_l": -140.77481079101562, "logps_train/policy_1_w": -114.65799713134766, "logps_train/policy_2_2": -103.35210418701172, "logps_train/policy_2_w": -157.90328979492188, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.1627802848815918, "rewards_train/1-l": -2.4858784675598145, "rewards_train/1-w": 3.4076385498046875, "rewards_train/2-2": 2.714008331298828, "rewards_train/2-w": 1.5924830436706543, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.893517017364502, "rewards_train/margins_1": 2.2448582649230957, "rewards_train/margins_2": 1.1215252876281738, "step": 508 }, { "epoch": 1.52, "logps_train/policy_1_2": -196.0223388671875, "logps_train/policy_1_l": -221.5209503173828, "logps_train/policy_1_w": -147.40219116210938, "logps_train/policy_2_2": -155.20445251464844, "logps_train/policy_2_w": -170.7776641845703, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 2.2883903980255127, "rewards_train/1-l": -2.6020960807800293, "rewards_train/1-w": 3.497281551361084, "rewards_train/2-2": 3.9795546531677246, "rewards_train/2-w": 2.4753589630126953, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.099377632141113, "rewards_train/margins_1": 1.2088911533355713, "rewards_train/margins_2": 1.5041956901550293, "step": 509 }, { "epoch": 1.52, "logps_train/policy_1_2": -148.76837158203125, "logps_train/policy_1_l": -141.88710021972656, "logps_train/policy_1_w": -138.62466430664062, "logps_train/policy_2_2": -111.14940643310547, "logps_train/policy_2_w": -183.02659606933594, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 0.9083185195922852, "rewards_train/1-l": -2.0920305252075195, "rewards_train/1-w": 4.532845973968506, "rewards_train/2-2": 1.9911142587661743, "rewards_train/2-w": 2.3207781314849854, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.624876499176025, "rewards_train/margins_1": 3.6245274543762207, "rewards_train/margins_2": -0.32966387271881104, "step": 509 }, { "epoch": 1.52, "logps_train/policy_1_2": -168.78439331054688, "logps_train/policy_1_l": -186.49427795410156, "logps_train/policy_1_w": -125.10719299316406, "logps_train/policy_2_2": -137.76840209960938, "logps_train/policy_2_w": -181.57791137695312, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 1.1528093814849854, "rewards_train/1-l": -1.9065566062927246, "rewards_train/1-w": 2.552269220352173, "rewards_train/2-2": 2.4528470039367676, "rewards_train/2-w": 0.5656470060348511, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.4588258266448975, "rewards_train/margins_1": 1.3994598388671875, "rewards_train/margins_2": 1.8871999979019165, "step": 509 }, { "epoch": 1.52, "logps_train/policy_1_2": -113.01412963867188, "logps_train/policy_1_l": -170.08621215820312, "logps_train/policy_1_w": -120.8980712890625, "logps_train/policy_2_2": -81.94259643554688, "logps_train/policy_2_w": -170.9893798828125, "logps_train/ref_1_2": -120.5, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 0.729446291923523, "rewards_train/1-l": -3.281205177307129, "rewards_train/1-w": 2.35550594329834, "rewards_train/2-2": 1.9797636270523071, "rewards_train/2-w": 0.14403122663497925, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.636711120605469, "rewards_train/margins_1": 1.626059651374817, "rewards_train/margins_2": 1.8357324004173279, "step": 509 }, { "epoch": 1.52, "logps_train/policy_1_2": -191.8181915283203, "logps_train/policy_1_l": -212.42764282226562, "logps_train/policy_1_w": -122.96224975585938, "logps_train/policy_2_2": -148.04916381835938, "logps_train/policy_2_w": -170.34295654296875, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.5681805610656738, "rewards_train/1-l": -2.894326686859131, "rewards_train/1-w": 3.4069008827209473, "rewards_train/2-2": 3.276334285736084, "rewards_train/2-w": 1.815704107284546, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.301227569580078, "rewards_train/margins_1": 1.8387203216552734, "rewards_train/margins_2": 1.460630178451538, "step": 509 }, { "epoch": 1.52, "logps_train/policy_1_2": -126.78477478027344, "logps_train/policy_1_l": -177.2943878173828, "logps_train/policy_1_w": -120.2138900756836, "logps_train/policy_2_2": -102.30408477783203, "logps_train/policy_2_w": -148.76280212402344, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.2773829698562622, "rewards_train/1-l": -1.8442816734313965, "rewards_train/1-w": 3.109861373901367, "rewards_train/2-2": 2.2109973430633545, "rewards_train/2-w": 1.8830945491790771, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.954143047332764, "rewards_train/margins_1": 1.832478404045105, "rewards_train/margins_2": 0.32790279388427734, "step": 509 }, { "epoch": 1.52, "logps_train/policy_1_2": -146.74244689941406, "logps_train/policy_1_l": -164.4327392578125, "logps_train/policy_1_w": -153.86520385742188, "logps_train/policy_2_2": -110.50811767578125, "logps_train/policy_2_w": -203.7990264892578, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.5366935729980469, "rewards_train/1-l": -1.7878044843673706, "rewards_train/1-w": 3.388284206390381, "rewards_train/2-2": 2.77418851852417, "rewards_train/2-w": 1.407010555267334, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.1760886907577515, "rewards_train/margins_1": 1.851590633392334, "rewards_train/margins_2": 1.367177963256836, "step": 509 }, { "epoch": 1.52, "logps_train/policy_1_2": -122.16935729980469, "logps_train/policy_1_l": -145.2207489013672, "logps_train/policy_1_w": -133.28457641601562, "logps_train/policy_2_2": -92.39019775390625, "logps_train/policy_2_w": -171.8721923828125, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 0.5455641746520996, "rewards_train/1-l": -2.0627002716064453, "rewards_train/1-w": 2.4959568977355957, "rewards_train/2-2": 1.9437928199768066, "rewards_train/2-w": 1.0432507991790771, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.558657169342041, "rewards_train/margins_1": 1.950392723083496, "rewards_train/margins_2": 0.9005420207977295, "step": 509 }, { "epoch": 1.53, "learning_rate": 7.409866592687768e-07, "loss": 0.4539, "step": 510 }, { "epoch": 1.53, "logps_train/policy_1_2": -152.8358612060547, "logps_train/policy_1_l": -169.32232666015625, "logps_train/policy_1_w": -167.80050659179688, "logps_train/policy_2_2": -109.8343505859375, "logps_train/policy_2_w": -223.9302978515625, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.3500080108642578, "rewards_train/1-l": -1.7456127405166626, "rewards_train/1-w": 3.4797139167785645, "rewards_train/2-2": 2.7462520599365234, "rewards_train/2-w": 0.9757214784622192, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.225326657295227, "rewards_train/margins_1": 2.1297059059143066, "rewards_train/margins_2": 1.7705305814743042, "step": 510 }, { "epoch": 1.53, "logps_train/policy_1_2": -122.7413101196289, "logps_train/policy_1_l": -90.31106567382812, "logps_train/policy_1_w": -86.49940490722656, "logps_train/policy_2_2": -94.47288513183594, "logps_train/policy_2_w": -114.05706024169922, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -76.5, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.7633689641952515, "rewards_train/1-l": -1.388925552368164, "rewards_train/1-w": 2.649278163909912, "rewards_train/2-2": 2.9355239868164062, "rewards_train/2-w": 1.5099194049835205, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.038203716278076, "rewards_train/margins_1": 0.8859091997146606, "rewards_train/margins_2": 1.4256045818328857, "step": 510 }, { "epoch": 1.53, "logps_train/policy_1_2": -217.178955078125, "logps_train/policy_1_l": -325.9159851074219, "logps_train/policy_1_w": -195.77488708496094, "logps_train/policy_2_2": -171.06349182128906, "logps_train/policy_2_w": -250.63143920898438, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -282.0, "logps_train/ref_1_w": -236.0, "logps_train/ref_2_2": -205.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 1.8563225269317627, "rewards_train/1-l": -4.401364326477051, "rewards_train/1-w": 3.9412615299224854, "rewards_train/2-2": 3.3975577354431152, "rewards_train/2-w": 1.8306063413619995, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 8.342625856399536, "rewards_train/margins_1": 2.0849390029907227, "rewards_train/margins_2": 1.5669513940811157, "step": 510 }, { "epoch": 1.53, "logps_train/policy_1_2": -210.14654541015625, "logps_train/policy_1_l": -162.69091796875, "logps_train/policy_1_w": -92.57838439941406, "logps_train/policy_2_2": -164.01206970214844, "logps_train/policy_2_w": -119.58494567871094, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.4189398288726807, "rewards_train/1-l": -3.1542489528656006, "rewards_train/1-w": 2.976536512374878, "rewards_train/2-2": 3.7287731170654297, "rewards_train/2-w": 2.0008814334869385, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.1307854652404785, "rewards_train/margins_1": 1.5575966835021973, "rewards_train/margins_2": 1.7278916835784912, "step": 510 }, { "epoch": 1.53, "logps_train/policy_1_2": -71.10224151611328, "logps_train/policy_1_l": -33.06072998046875, "logps_train/policy_1_w": -45.615455627441406, "logps_train/policy_2_2": -51.67381286621094, "logps_train/policy_2_w": -69.82179260253906, "logps_train/ref_1_2": -81.5, "logps_train/ref_1_l": -22.5, "logps_train/ref_1_w": -68.5, "logps_train/ref_2_2": -72.0, "logps_train/ref_2_w": -80.0, "rewards_train/1-2": 1.0501761436462402, "rewards_train/1-l": -1.0495790243148804, "rewards_train/1-w": 2.273855209350586, "rewards_train/2-2": 2.058497667312622, "rewards_train/2-w": 1.0022928714752197, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3234342336654663, "rewards_train/margins_1": 1.2236790657043457, "rewards_train/margins_2": 1.0562047958374023, "step": 510 }, { "epoch": 1.53, "logps_train/policy_1_2": -159.06759643554688, "logps_train/policy_1_l": -185.72488403320312, "logps_train/policy_1_w": -121.58660888671875, "logps_train/policy_2_2": -118.74516296386719, "logps_train/policy_2_w": -175.92745971679688, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 0.765211820602417, "rewards_train/1-l": -0.8736599683761597, "rewards_train/1-w": 3.0088186264038086, "rewards_train/2-2": 2.523921012878418, "rewards_train/2-w": 0.4240517318248749, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8824785947799683, "rewards_train/margins_1": 2.2436068058013916, "rewards_train/margins_2": 2.099869281053543, "step": 510 }, { "epoch": 1.53, "logps_train/policy_1_2": -79.78751373291016, "logps_train/policy_1_l": -104.42060852050781, "logps_train/policy_1_w": -71.07464599609375, "logps_train/policy_2_2": -53.74692153930664, "logps_train/policy_2_w": -107.5927734375, "logps_train/ref_1_2": -90.0, "logps_train/ref_1_l": -84.5, "logps_train/ref_1_w": -95.5, "logps_train/ref_2_2": -70.5, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": 1.0462486743927002, "rewards_train/1-l": -2.0221383571624756, "rewards_train/1-w": 2.4128482341766357, "rewards_train/2-2": 1.6784329414367676, "rewards_train/2-w": 0.7172845005989075, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.434986591339111, "rewards_train/margins_1": 1.3665995597839355, "rewards_train/margins_2": 0.9611484408378601, "step": 510 }, { "epoch": 1.53, "logps_train/policy_1_2": -206.9840850830078, "logps_train/policy_1_l": -293.9511413574219, "logps_train/policy_1_w": -156.99053955078125, "logps_train/policy_2_2": -149.47048950195312, "logps_train/policy_2_w": -226.39273071289062, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -260.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -239.0, "rewards_train/1-2": 1.1890918016433716, "rewards_train/1-l": -3.385739803314209, "rewards_train/1-w": 3.850945472717285, "rewards_train/2-2": 3.052952289581299, "rewards_train/2-w": 1.3044761419296265, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.236685276031494, "rewards_train/margins_1": 2.6618536710739136, "rewards_train/margins_2": 1.7484761476516724, "step": 510 }, { "epoch": 1.53, "logps_train/policy_1_2": -137.0026092529297, "logps_train/policy_1_l": -168.3292236328125, "logps_train/policy_1_w": -150.2350311279297, "logps_train/policy_2_2": -107.47206115722656, "logps_train/policy_2_w": -186.55264282226562, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.7028634548187256, "rewards_train/1-l": -2.311633348464966, "rewards_train/1-w": 3.6639976501464844, "rewards_train/2-2": 3.0262320041656494, "rewards_train/2-w": 1.7564537525177002, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.97563099861145, "rewards_train/margins_1": 1.9611341953277588, "rewards_train/margins_2": 1.2697782516479492, "step": 511 }, { "epoch": 1.53, "logps_train/policy_1_2": -105.95863342285156, "logps_train/policy_1_l": -109.33699798583984, "logps_train/policy_1_w": -110.71018981933594, "logps_train/policy_2_2": -88.638671875, "logps_train/policy_2_w": -149.15386962890625, "logps_train/ref_1_2": -121.5, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.567613124847412, "rewards_train/1-l": -1.5631916522979736, "rewards_train/1-w": 3.0406999588012695, "rewards_train/2-2": 2.4183602333068848, "rewards_train/2-w": 1.3172309398651123, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.603891611099243, "rewards_train/margins_1": 1.4730868339538574, "rewards_train/margins_2": 1.1011292934417725, "step": 511 }, { "epoch": 1.53, "logps_train/policy_1_2": -100.65696716308594, "logps_train/policy_1_l": -132.02587890625, "logps_train/policy_1_w": -105.52096557617188, "logps_train/policy_2_2": -76.40678405761719, "logps_train/policy_2_w": -139.59527587890625, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -115.5, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.8436781167984009, "rewards_train/1-l": -1.648876667022705, "rewards_train/1-w": 3.165091037750244, "rewards_train/2-2": 2.633539915084839, "rewards_train/2-w": 1.8842216730117798, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.813967704772949, "rewards_train/margins_1": 1.3214129209518433, "rewards_train/margins_2": 0.7493182420730591, "step": 511 }, { "epoch": 1.53, "logps_train/policy_1_2": -97.61941528320312, "logps_train/policy_1_l": -169.95228576660156, "logps_train/policy_1_w": -92.83668518066406, "logps_train/policy_2_2": -74.57915496826172, "logps_train/policy_2_w": -125.96354675292969, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.4474334716796875, "rewards_train/1-l": -2.2330706119537354, "rewards_train/1-w": 3.886644124984741, "rewards_train/2-2": 1.9663029909133911, "rewards_train/2-w": 2.5473949909210205, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.119714736938477, "rewards_train/margins_1": 2.4392106533050537, "rewards_train/margins_2": -0.5810920000076294, "step": 511 }, { "epoch": 1.53, "logps_train/policy_1_2": -109.52371215820312, "logps_train/policy_1_l": -109.77902221679688, "logps_train/policy_1_w": -72.90972900390625, "logps_train/policy_2_2": -96.98754119873047, "logps_train/policy_2_w": -87.4005126953125, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -105.5, "rewards_train/1-2": 1.39919114112854, "rewards_train/1-l": -2.164621353149414, "rewards_train/1-w": 2.4223082065582275, "rewards_train/2-2": 2.1114025115966797, "rewards_train/2-w": 1.8357303142547607, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.586929559707642, "rewards_train/margins_1": 1.0231170654296875, "rewards_train/margins_2": 0.27567219734191895, "step": 511 }, { "epoch": 1.53, "logps_train/policy_1_2": -131.94512939453125, "logps_train/policy_1_l": -163.93136596679688, "logps_train/policy_1_w": -124.47657012939453, "logps_train/policy_2_2": -94.58450317382812, "logps_train/policy_2_w": -181.69149780273438, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": 1.719550371170044, "rewards_train/1-l": -2.267550468444824, "rewards_train/1-w": 3.70546817779541, "rewards_train/2-2": 3.0602989196777344, "rewards_train/2-w": 1.5292880535125732, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.973018646240234, "rewards_train/margins_1": 1.9859178066253662, "rewards_train/margins_2": 1.5310108661651611, "step": 511 }, { "epoch": 1.53, "logps_train/policy_1_2": -100.38896942138672, "logps_train/policy_1_l": -162.30685424804688, "logps_train/policy_1_w": -82.59355926513672, "logps_train/policy_2_2": -77.65150451660156, "logps_train/policy_2_w": -102.20245361328125, "logps_train/ref_1_2": -109.5, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -103.5, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": 0.8900092840194702, "rewards_train/1-l": -2.7576379776000977, "rewards_train/1-w": 2.093769073486328, "rewards_train/2-2": 1.9143418073654175, "rewards_train/2-w": 1.4266297817230225, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.851407051086426, "rewards_train/margins_1": 1.203759789466858, "rewards_train/margins_2": 0.487712025642395, "step": 511 }, { "epoch": 1.53, "logps_train/policy_1_2": -244.18740844726562, "logps_train/policy_1_l": -180.10400390625, "logps_train/policy_1_w": -88.96923065185547, "logps_train/policy_2_2": -191.76419067382812, "logps_train/policy_2_w": -127.90248107910156, "logps_train/ref_1_2": -254.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -224.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 0.9453212022781372, "rewards_train/1-l": -2.1447763442993164, "rewards_train/1-w": 3.221827268600464, "rewards_train/2-2": 3.282954692840576, "rewards_train/2-w": 1.534751057624817, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.36660361289978, "rewards_train/margins_1": 2.2765060663223267, "rewards_train/margins_2": 1.7482036352157593, "step": 511 }, { "epoch": 1.53, "learning_rate": 7.235225677961513e-07, "loss": 0.4216, "step": 512 }, { "epoch": 1.53, "logps_train/policy_1_2": -70.10679626464844, "logps_train/policy_1_l": -85.46127319335938, "logps_train/policy_1_w": -60.44713592529297, "logps_train/policy_2_2": -57.03136444091797, "logps_train/policy_2_w": -78.04822540283203, "logps_train/ref_1_2": -77.5, "logps_train/ref_1_l": -65.5, "logps_train/ref_1_w": -84.5, "logps_train/ref_2_2": -70.0, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": 0.7233041524887085, "rewards_train/1-l": -1.9980807304382324, "rewards_train/1-w": 2.407630443572998, "rewards_train/2-2": 1.3244024515151978, "rewards_train/2-w": 1.3889272212982178, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.4057111740112305, "rewards_train/margins_1": 1.6843262910842896, "rewards_train/margins_2": -0.06452476978302002, "step": 512 }, { "epoch": 1.53, "logps_train/policy_1_2": -209.99807739257812, "logps_train/policy_1_l": -206.41134643554688, "logps_train/policy_1_w": -138.37789916992188, "logps_train/policy_2_2": -163.4256591796875, "logps_train/policy_2_w": -188.9241485595703, "logps_train/ref_1_2": -227.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": 1.7064433097839355, "rewards_train/1-l": -3.192307710647583, "rewards_train/1-w": 3.9079132080078125, "rewards_train/2-2": 3.8121204376220703, "rewards_train/2-w": 2.1587576866149902, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.1002209186553955, "rewards_train/margins_1": 2.201469898223877, "rewards_train/margins_2": 1.65336275100708, "step": 512 }, { "epoch": 1.53, "logps_train/policy_1_2": -154.60147094726562, "logps_train/policy_1_l": -170.2398681640625, "logps_train/policy_1_w": -144.33961486816406, "logps_train/policy_2_2": -121.62751770019531, "logps_train/policy_2_w": -176.92098999023438, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -0.14432719349861145, "rewards_train/1-l": -2.692542791366577, "rewards_train/1-w": 3.8035383224487305, "rewards_train/2-2": 1.7702556848526, "rewards_train/2-w": 2.070401430130005, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.496081113815308, "rewards_train/margins_1": 3.947865515947342, "rewards_train/margins_2": -0.3001457452774048, "step": 512 }, { "epoch": 1.53, "logps_train/policy_1_2": -206.25213623046875, "logps_train/policy_1_l": -372.4811706542969, "logps_train/policy_1_w": -129.25607299804688, "logps_train/policy_2_2": -157.70396423339844, "logps_train/policy_2_w": -173.68499755859375, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -326.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 2.235724449157715, "rewards_train/1-l": -4.664132118225098, "rewards_train/1-w": 3.457204818725586, "rewards_train/2-2": 3.8206193447113037, "rewards_train/2-w": 1.8502496480941772, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 8.121336936950684, "rewards_train/margins_1": 1.221480369567871, "rewards_train/margins_2": 1.9703696966171265, "step": 512 }, { "epoch": 1.53, "logps_train/policy_1_2": -173.4109649658203, "logps_train/policy_1_l": -176.2603759765625, "logps_train/policy_1_w": -154.72479248046875, "logps_train/policy_2_2": -133.8577423095703, "logps_train/policy_2_w": -190.2761688232422, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.9135911464691162, "rewards_train/1-l": -1.5387818813323975, "rewards_train/1-w": 3.2447094917297363, "rewards_train/2-2": 3.5911781787872314, "rewards_train/2-w": 1.7755084037780762, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.783491373062134, "rewards_train/margins_1": 1.3311183452606201, "rewards_train/margins_2": 1.8156697750091553, "step": 512 }, { "epoch": 1.53, "logps_train/policy_1_2": -195.2763671875, "logps_train/policy_1_l": -221.85874938964844, "logps_train/policy_1_w": -258.06103515625, "logps_train/policy_2_2": -144.41184997558594, "logps_train/policy_2_w": -344.34027099609375, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -304.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -352.0, "rewards_train/1-2": 1.9786126613616943, "rewards_train/1-l": -2.973374843597412, "rewards_train/1-w": 4.518896102905273, "rewards_train/2-2": 3.618189811706543, "rewards_train/2-w": 0.7245657444000244, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.4922709465026855, "rewards_train/margins_1": 2.540283441543579, "rewards_train/margins_2": 2.8936240673065186, "step": 512 }, { "epoch": 1.53, "logps_train/policy_1_2": -126.11363220214844, "logps_train/policy_1_l": -117.3385238647461, "logps_train/policy_1_w": -129.66409301757812, "logps_train/policy_2_2": -97.84278106689453, "logps_train/policy_2_w": -173.7210693359375, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -122.5, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.5636370182037354, "rewards_train/1-l": -1.7397117614746094, "rewards_train/1-w": 3.171090602874756, "rewards_train/2-2": 2.454784393310547, "rewards_train/2-w": 1.5528926849365234, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.910802364349365, "rewards_train/margins_1": 1.6074535846710205, "rewards_train/margins_2": 0.9018917083740234, "step": 512 }, { "epoch": 1.53, "logps_train/policy_1_2": -149.9572296142578, "logps_train/policy_1_l": -172.73690795898438, "logps_train/policy_1_w": -100.27162170410156, "logps_train/policy_2_2": -122.93392181396484, "logps_train/policy_2_w": -125.49736022949219, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 1.9558393955230713, "rewards_train/1-l": -2.668612480163574, "rewards_train/1-w": 2.9861185550689697, "rewards_train/2-2": 2.6847329139709473, "rewards_train/2-w": 1.7455769777297974, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.654731035232544, "rewards_train/margins_1": 1.0302791595458984, "rewards_train/margins_2": 0.9391559362411499, "step": 512 }, { "epoch": 1.54, "logps_train/policy_1_2": -166.75563049316406, "logps_train/policy_1_l": -148.22042846679688, "logps_train/policy_1_w": -140.03570556640625, "logps_train/policy_2_2": -124.96435546875, "logps_train/policy_2_w": -182.95806884765625, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.623070240020752, "rewards_train/1-l": -2.0087623596191406, "rewards_train/1-w": 3.6257266998291016, "rewards_train/2-2": 3.3086419105529785, "rewards_train/2-w": 2.11317777633667, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.634489059448242, "rewards_train/margins_1": 2.0026564598083496, "rewards_train/margins_2": 1.1954641342163086, "step": 513 }, { "epoch": 1.54, "logps_train/policy_1_2": -247.82212829589844, "logps_train/policy_1_l": -165.3625030517578, "logps_train/policy_1_w": -140.82269287109375, "logps_train/policy_2_2": -205.42401123046875, "logps_train/policy_2_w": -184.8523712158203, "logps_train/ref_1_2": -266.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -240.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.7959116697311401, "rewards_train/1-l": -1.9270710945129395, "rewards_train/1-w": 3.697418689727783, "rewards_train/2-2": 3.409160852432251, "rewards_train/2-w": 2.4928884506225586, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.624489784240723, "rewards_train/margins_1": 1.901507019996643, "rewards_train/margins_2": 0.9162724018096924, "step": 513 }, { "epoch": 1.54, "logps_train/policy_1_2": -127.9378662109375, "logps_train/policy_1_l": -155.48411560058594, "logps_train/policy_1_w": -62.08146286010742, "logps_train/policy_2_2": -98.27958679199219, "logps_train/policy_2_w": -92.42975616455078, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -83.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -101.0, "rewards_train/1-2": 1.3859014511108398, "rewards_train/1-l": -2.6097397804260254, "rewards_train/1-w": 2.0992753505706787, "rewards_train/2-2": 2.4439167976379395, "rewards_train/2-w": 0.8476498126983643, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.709015130996704, "rewards_train/margins_1": 0.7133738994598389, "rewards_train/margins_2": 1.5962669849395752, "step": 513 }, { "epoch": 1.54, "logps_train/policy_1_2": -232.78692626953125, "logps_train/policy_1_l": -251.03256225585938, "logps_train/policy_1_w": -207.24317932128906, "logps_train/policy_2_2": -200.2215576171875, "logps_train/policy_2_w": -257.217529296875, "logps_train/ref_1_2": -255.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -249.0, "logps_train/ref_2_2": -234.0, "logps_train/ref_2_w": -276.0, "rewards_train/1-2": 2.230681896209717, "rewards_train/1-l": -2.906967878341675, "rewards_train/1-w": 4.156932830810547, "rewards_train/2-2": 3.39346981048584, "rewards_train/2-w": 1.8782492876052856, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.063900709152222, "rewards_train/margins_1": 1.92625093460083, "rewards_train/margins_2": 1.5152205228805542, "step": 513 }, { "epoch": 1.54, "logps_train/policy_1_2": -176.4007568359375, "logps_train/policy_1_l": -154.5355682373047, "logps_train/policy_1_w": -127.6834716796875, "logps_train/policy_2_2": -126.19206237792969, "logps_train/policy_2_w": -162.4530792236328, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 2.0349245071411133, "rewards_train/1-l": -1.2147444486618042, "rewards_train/1-w": 2.920074701309204, "rewards_train/2-2": 3.6214187145233154, "rewards_train/2-w": 2.2929208278656006, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.134819149971008, "rewards_train/margins_1": 0.8851501941680908, "rewards_train/margins_2": 1.3284978866577148, "step": 513 }, { "epoch": 1.54, "logps_train/policy_1_2": -209.73379516601562, "logps_train/policy_1_l": -212.71693420410156, "logps_train/policy_1_w": -230.36578369140625, "logps_train/policy_2_2": -166.31201171875, "logps_train/policy_2_w": -298.77935791015625, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -280.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -308.0, "rewards_train/1-2": 1.845369815826416, "rewards_train/1-l": -2.0466933250427246, "rewards_train/1-w": 4.963421821594238, "rewards_train/2-2": 3.4031736850738525, "rewards_train/2-w": 0.8486284613609314, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.010115146636963, "rewards_train/margins_1": 3.1180520057678223, "rewards_train/margins_2": 2.554545223712921, "step": 513 }, { "epoch": 1.54, "logps_train/policy_1_2": -136.824462890625, "logps_train/policy_1_l": -145.39291381835938, "logps_train/policy_1_w": -103.98243713378906, "logps_train/policy_2_2": -101.47608184814453, "logps_train/policy_2_w": -145.0444793701172, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 1.0316162109375, "rewards_train/1-l": -1.7773776054382324, "rewards_train/1-w": 2.4265613555908203, "rewards_train/2-2": 2.7014153003692627, "rewards_train/2-w": 0.6203564405441284, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.203938961029053, "rewards_train/margins_1": 1.3949451446533203, "rewards_train/margins_2": 2.0810588598251343, "step": 513 }, { "epoch": 1.54, "logps_train/policy_1_2": -170.67958068847656, "logps_train/policy_1_l": -234.36376953125, "logps_train/policy_1_w": -191.06173706054688, "logps_train/policy_2_2": -147.10092163085938, "logps_train/policy_2_w": -218.3665008544922, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -222.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -239.0, "rewards_train/1-2": 2.242588758468628, "rewards_train/1-l": -2.690697193145752, "rewards_train/1-w": 3.15437388420105, "rewards_train/2-2": 3.304360866546631, "rewards_train/2-w": 2.081317901611328, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.845071077346802, "rewards_train/margins_1": 0.9117851257324219, "rewards_train/margins_2": 1.2230429649353027, "step": 513 }, { "epoch": 1.54, "learning_rate": 7.062318573891716e-07, "loss": 0.3913, "step": 514 }, { "epoch": 1.54, "logps_train/policy_1_2": -199.55569458007812, "logps_train/policy_1_l": -157.8453369140625, "logps_train/policy_1_w": -161.2822723388672, "logps_train/policy_2_2": -153.31112670898438, "logps_train/policy_2_w": -206.63038635253906, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.1147431135177612, "rewards_train/1-l": -1.5970344543457031, "rewards_train/1-w": 3.2030234336853027, "rewards_train/2-2": 3.2454490661621094, "rewards_train/2-w": 1.368211269378662, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.800057888031006, "rewards_train/margins_1": 2.0882803201675415, "rewards_train/margins_2": 1.8772377967834473, "step": 514 }, { "epoch": 1.54, "logps_train/policy_1_2": -219.21197509765625, "logps_train/policy_1_l": -232.2645721435547, "logps_train/policy_1_w": -185.00918579101562, "logps_train/policy_2_2": -164.61212158203125, "logps_train/policy_2_w": -250.3076171875, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -226.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -266.0, "rewards_train/1-2": 2.4561476707458496, "rewards_train/1-l": -2.5986242294311523, "rewards_train/1-w": 4.130331039428711, "rewards_train/2-2": 4.332536697387695, "rewards_train/2-w": 1.5551772117614746, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.728955268859863, "rewards_train/margins_1": 1.6741833686828613, "rewards_train/margins_2": 2.7773594856262207, "step": 514 }, { "epoch": 1.54, "logps_train/policy_1_2": -45.2186393737793, "logps_train/policy_1_l": -79.72470092773438, "logps_train/policy_1_w": -59.86564254760742, "logps_train/policy_2_2": -32.9904670715332, "logps_train/policy_2_w": -74.15702056884766, "logps_train/ref_1_2": -60.25, "logps_train/ref_1_l": -61.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -52.5, "logps_train/ref_2_w": -91.5, "rewards_train/1-2": 1.4968857765197754, "rewards_train/1-l": -1.8892675638198853, "rewards_train/1-w": 2.429060935974121, "rewards_train/2-2": 1.9323985576629639, "rewards_train/2-w": 1.7288289070129395, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.318328499794006, "rewards_train/margins_1": 0.9321751594543457, "rewards_train/margins_2": 0.20356965065002441, "step": 514 }, { "epoch": 1.54, "logps_train/policy_1_2": -217.7281494140625, "logps_train/policy_1_l": -263.12176513671875, "logps_train/policy_1_w": -186.6499786376953, "logps_train/policy_2_2": -171.77978515625, "logps_train/policy_2_w": -250.20904541015625, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -237.0, "logps_train/ref_1_w": -232.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -272.0, "rewards_train/1-2": 2.3271851539611816, "rewards_train/1-l": -2.5434274673461914, "rewards_train/1-w": 4.547502517700195, "rewards_train/2-2": 3.897021532058716, "rewards_train/2-w": 2.2665958404541016, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.090929985046387, "rewards_train/margins_1": 2.2203173637390137, "rewards_train/margins_2": 1.6304256916046143, "step": 514 }, { "epoch": 1.54, "logps_train/policy_1_2": -116.60169219970703, "logps_train/policy_1_l": -184.04859924316406, "logps_train/policy_1_w": -107.20758056640625, "logps_train/policy_2_2": -78.91336059570312, "logps_train/policy_2_w": -147.9911651611328, "logps_train/ref_1_2": -125.5, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -102.5, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 0.8671740889549255, "rewards_train/1-l": -2.7698988914489746, "rewards_train/1-w": 2.8370537757873535, "rewards_train/2-2": 2.3664767742156982, "rewards_train/2-w": 1.3133827447891235, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.606952667236328, "rewards_train/margins_1": 1.969879686832428, "rewards_train/margins_2": 1.0530940294265747, "step": 514 }, { "epoch": 1.54, "logps_train/policy_1_2": -141.19326782226562, "logps_train/policy_1_l": -134.8876190185547, "logps_train/policy_1_w": -128.36080932617188, "logps_train/policy_2_2": -112.80708312988281, "logps_train/policy_2_w": -160.46044921875, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.0666115283966064, "rewards_train/1-l": -2.1075124740600586, "rewards_train/1-w": 2.8779821395874023, "rewards_train/2-2": 2.341167449951172, "rewards_train/2-w": 1.6305179595947266, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.985494613647461, "rewards_train/margins_1": 1.811370611190796, "rewards_train/margins_2": 0.7106494903564453, "step": 514 }, { "epoch": 1.54, "logps_train/policy_1_2": -182.7330780029297, "logps_train/policy_1_l": -191.5623779296875, "logps_train/policy_1_w": -105.87464904785156, "logps_train/policy_2_2": -152.3112030029297, "logps_train/policy_2_w": -135.83676147460938, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 2.5040364265441895, "rewards_train/1-l": -2.1847524642944336, "rewards_train/1-w": 3.546910285949707, "rewards_train/2-2": 3.7345046997070312, "rewards_train/2-w": 2.5366363525390625, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.731662750244141, "rewards_train/margins_1": 1.0428738594055176, "rewards_train/margins_2": 1.1978683471679688, "step": 514 }, { "epoch": 1.54, "logps_train/policy_1_2": -110.24400329589844, "logps_train/policy_1_l": -128.87823486328125, "logps_train/policy_1_w": -89.9914779663086, "logps_train/policy_2_2": -82.22787475585938, "logps_train/policy_2_w": -124.94331359863281, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.325599193572998, "rewards_train/1-l": -2.947979211807251, "rewards_train/1-w": 3.3071024417877197, "rewards_train/2-2": 2.6022119522094727, "rewards_train/2-w": 1.6931686401367188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.255081653594971, "rewards_train/margins_1": 1.9815032482147217, "rewards_train/margins_2": 0.9090433120727539, "step": 514 }, { "epoch": 1.54, "logps_train/policy_1_2": -132.6556854248047, "logps_train/policy_1_l": -129.9641876220703, "logps_train/policy_1_w": -92.0721206665039, "logps_train/policy_2_2": -104.0313720703125, "logps_train/policy_2_w": -125.16905212402344, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.3469310998916626, "rewards_train/1-l": -1.9272781610488892, "rewards_train/1-w": 2.491225242614746, "rewards_train/2-2": 2.5361204147338867, "rewards_train/2-w": 1.4567267894744873, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.418503403663635, "rewards_train/margins_1": 1.1442941427230835, "rewards_train/margins_2": 1.0793936252593994, "step": 515 }, { "epoch": 1.54, "logps_train/policy_1_2": -80.75936126708984, "logps_train/policy_1_l": -134.06158447265625, "logps_train/policy_1_w": -61.59835433959961, "logps_train/policy_2_2": -55.79694366455078, "logps_train/policy_2_w": -93.3329086303711, "logps_train/ref_1_2": -92.0, "logps_train/ref_1_l": -110.5, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -76.5, "logps_train/ref_2_w": -105.5, "rewards_train/1-2": 1.091251015663147, "rewards_train/1-l": -2.3563528060913086, "rewards_train/1-w": 2.271023750305176, "rewards_train/2-2": 2.06718111038208, "rewards_train/2-w": 1.197959065437317, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.627376556396484, "rewards_train/margins_1": 1.1797727346420288, "rewards_train/margins_2": 0.8692220449447632, "step": 515 }, { "epoch": 1.54, "logps_train/policy_1_2": -104.39726257324219, "logps_train/policy_1_l": -107.89391326904297, "logps_train/policy_1_w": -62.26230239868164, "logps_train/policy_2_2": -73.04698181152344, "logps_train/policy_2_w": -87.67737579345703, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -85.0, "logps_train/ref_1_w": -82.0, "logps_train/ref_2_2": -91.5, "logps_train/ref_2_w": -100.5, "rewards_train/1-2": 0.6633989810943604, "rewards_train/1-l": -2.272057056427002, "rewards_train/1-w": 2.0081448554992676, "rewards_train/2-2": 1.8331925868988037, "rewards_train/2-w": 1.2931997776031494, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.2802019119262695, "rewards_train/margins_1": 1.3447458744049072, "rewards_train/margins_2": 0.5399928092956543, "step": 515 }, { "epoch": 1.54, "logps_train/policy_1_2": -184.01588439941406, "logps_train/policy_1_l": -118.43985748291016, "logps_train/policy_1_w": -146.160400390625, "logps_train/policy_2_2": -143.33746337890625, "logps_train/policy_2_w": -182.57888793945312, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.506224513053894, "rewards_train/1-l": -1.2525789737701416, "rewards_train/1-w": 3.4777097702026367, "rewards_train/2-2": 3.1146907806396484, "rewards_train/2-w": 2.0983614921569824, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.730288743972778, "rewards_train/margins_1": 1.9714852571487427, "rewards_train/margins_2": 1.016329288482666, "step": 515 }, { "epoch": 1.54, "logps_train/policy_1_2": -115.39472961425781, "logps_train/policy_1_l": -173.81350708007812, "logps_train/policy_1_w": -114.65050506591797, "logps_train/policy_2_2": -88.96568298339844, "logps_train/policy_2_w": -140.93524169921875, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.8074017763137817, "rewards_train/1-l": -2.4325222969055176, "rewards_train/1-w": 3.291980743408203, "rewards_train/2-2": 2.429212808609009, "rewards_train/2-w": 2.004132032394409, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.724503040313721, "rewards_train/margins_1": 1.4845789670944214, "rewards_train/margins_2": 0.4250807762145996, "step": 515 }, { "epoch": 1.54, "logps_train/policy_1_2": -159.76266479492188, "logps_train/policy_1_l": -117.54444885253906, "logps_train/policy_1_w": -106.49382019042969, "logps_train/policy_2_2": -115.27322387695312, "logps_train/policy_2_w": -152.91412353515625, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.7409212589263916, "rewards_train/1-l": -1.1765645742416382, "rewards_train/1-w": 2.8959310054779053, "rewards_train/2-2": 3.163302183151245, "rewards_train/2-w": 1.2437443733215332, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.0724955797195435, "rewards_train/margins_1": 1.1550097465515137, "rewards_train/margins_2": 1.919557809829712, "step": 515 }, { "epoch": 1.54, "logps_train/policy_1_2": -194.78382873535156, "logps_train/policy_1_l": -166.50299072265625, "logps_train/policy_1_w": -87.58822631835938, "logps_train/policy_2_2": -148.0324249267578, "logps_train/policy_2_w": -111.94132995605469, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 0.7380236387252808, "rewards_train/1-l": -2.5682687759399414, "rewards_train/1-w": 2.652895927429199, "rewards_train/2-2": 3.1858201026916504, "rewards_train/2-w": 1.9043049812316895, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.221164703369141, "rewards_train/margins_1": 1.9148722887039185, "rewards_train/margins_2": 1.281515121459961, "step": 515 }, { "epoch": 1.54, "logps_train/policy_1_2": -138.81332397460938, "logps_train/policy_1_l": -150.96485900878906, "logps_train/policy_1_w": -86.25518035888672, "logps_train/policy_2_2": -104.07185363769531, "logps_train/policy_2_w": -122.26760864257812, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.6311664581298828, "rewards_train/1-l": -2.8652358055114746, "rewards_train/1-w": 2.6238961219787598, "rewards_train/2-2": 3.164689302444458, "rewards_train/2-w": 1.1849578619003296, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.489131927490234, "rewards_train/margins_1": 0.992729663848877, "rewards_train/margins_2": 1.9797314405441284, "step": 515 }, { "epoch": 1.54, "learning_rate": 6.89116215590693e-07, "loss": 0.3678, "step": 516 }, { "epoch": 1.54, "logps_train/policy_1_2": -150.55740356445312, "logps_train/policy_1_l": -119.5861587524414, "logps_train/policy_1_w": -115.335693359375, "logps_train/policy_2_2": -120.05072784423828, "logps_train/policy_2_w": -138.5919647216797, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.5926966667175293, "rewards_train/1-l": -1.8834205865859985, "rewards_train/1-w": 3.237133741378784, "rewards_train/2-2": 2.929302215576172, "rewards_train/2-w": 1.9915852546691895, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.120554327964783, "rewards_train/margins_1": 1.6444370746612549, "rewards_train/margins_2": 0.9377169609069824, "step": 516 }, { "epoch": 1.54, "logps_train/policy_1_2": -105.81089782714844, "logps_train/policy_1_l": -108.005859375, "logps_train/policy_1_w": -50.01138687133789, "logps_train/policy_2_2": -78.85906219482422, "logps_train/policy_2_w": -79.51821899414062, "logps_train/ref_1_2": -117.5, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -69.5, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -88.0, "rewards_train/1-2": 1.149378776550293, "rewards_train/1-l": -1.8947272300720215, "rewards_train/1-w": 1.9262049198150635, "rewards_train/2-2": 2.313312530517578, "rewards_train/2-w": 0.8134121298789978, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.820932149887085, "rewards_train/margins_1": 0.7768261432647705, "rewards_train/margins_2": 1.4999004006385803, "step": 516 }, { "epoch": 1.54, "logps_train/policy_1_2": -194.51971435546875, "logps_train/policy_1_l": -218.2706298828125, "logps_train/policy_1_w": -155.6890869140625, "logps_train/policy_2_2": -127.93126678466797, "logps_train/policy_2_w": -235.2261962890625, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -197.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -244.0, "rewards_train/1-2": 1.0980277061462402, "rewards_train/1-l": -2.3520631790161133, "rewards_train/1-w": 4.130701065063477, "rewards_train/2-2": 3.8537487983703613, "rewards_train/2-w": 0.8808966875076294, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.48276424407959, "rewards_train/margins_1": 3.0326733589172363, "rewards_train/margins_2": 2.972852110862732, "step": 516 }, { "epoch": 1.54, "logps_train/policy_1_2": -176.72604370117188, "logps_train/policy_1_l": -173.6340789794922, "logps_train/policy_1_w": -201.55252075195312, "logps_train/policy_2_2": -131.91384887695312, "logps_train/policy_2_w": -266.1824951171875, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -241.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -282.0, "rewards_train/1-2": 1.962160348892212, "rewards_train/1-l": -2.0145790576934814, "rewards_train/1-w": 3.9322476387023926, "rewards_train/2-2": 3.410569190979004, "rewards_train/2-w": 1.5380022525787354, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.946826696395874, "rewards_train/margins_1": 1.9700872898101807, "rewards_train/margins_2": 1.8725669384002686, "step": 516 }, { "epoch": 1.54, "logps_train/policy_1_2": -180.01913452148438, "logps_train/policy_1_l": -203.41787719726562, "logps_train/policy_1_w": -120.79219055175781, "logps_train/policy_2_2": -131.4767303466797, "logps_train/policy_2_w": -174.04428100585938, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.359023094177246, "rewards_train/1-l": -2.8285069465637207, "rewards_train/1-w": 3.1348438262939453, "rewards_train/2-2": 3.144515037536621, "rewards_train/2-w": 1.3346349000930786, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.963350772857666, "rewards_train/margins_1": 1.7758207321166992, "rewards_train/margins_2": 1.8098801374435425, "step": 516 }, { "epoch": 1.54, "logps_train/policy_1_2": -240.6839599609375, "logps_train/policy_1_l": -205.69493103027344, "logps_train/policy_1_w": -163.94659423828125, "logps_train/policy_2_2": -193.11380004882812, "logps_train/policy_2_w": -203.97950744628906, "logps_train/ref_1_2": -262.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -203.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 2.1222290992736816, "rewards_train/1-l": -2.5085554122924805, "rewards_train/1-w": 3.9053406715393066, "rewards_train/2-2": 3.8647923469543457, "rewards_train/2-w": 2.2301740646362305, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.413896083831787, "rewards_train/margins_1": 1.783111572265625, "rewards_train/margins_2": 1.6346182823181152, "step": 516 }, { "epoch": 1.54, "logps_train/policy_1_2": -167.79132080078125, "logps_train/policy_1_l": -213.796142578125, "logps_train/policy_1_w": -115.40410614013672, "logps_train/policy_2_2": -122.19392395019531, "logps_train/policy_2_w": -161.3016357421875, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 0.9634464979171753, "rewards_train/1-l": -4.1215081214904785, "rewards_train/1-w": 2.8470888137817383, "rewards_train/2-2": 2.868913173675537, "rewards_train/2-w": 0.9190551042556763, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.968596935272217, "rewards_train/margins_1": 1.883642315864563, "rewards_train/margins_2": 1.9498580694198608, "step": 516 }, { "epoch": 1.54, "logps_train/policy_1_2": -201.1392822265625, "logps_train/policy_1_l": -313.64117431640625, "logps_train/policy_1_w": -148.8881072998047, "logps_train/policy_2_2": -153.21554565429688, "logps_train/policy_2_w": -198.30941772460938, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -272.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 2.161071538925171, "rewards_train/1-l": -4.193022727966309, "rewards_train/1-w": 3.8174397945404053, "rewards_train/2-2": 3.584695816040039, "rewards_train/2-w": 1.9003080129623413, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 8.010462522506714, "rewards_train/margins_1": 1.6563682556152344, "rewards_train/margins_2": 1.6843878030776978, "step": 516 }, { "epoch": 1.55, "logps_train/policy_1_2": -265.98822021484375, "logps_train/policy_1_l": -204.41546630859375, "logps_train/policy_1_w": -145.06484985351562, "logps_train/policy_2_2": -199.00375366210938, "logps_train/policy_2_w": -188.322998046875, "logps_train/ref_1_2": -272.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -236.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 0.6261782646179199, "rewards_train/1-l": -2.6727962493896484, "rewards_train/1-w": 3.834138870239258, "rewards_train/2-2": 3.680875301361084, "rewards_train/2-w": 2.3880133628845215, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.506935119628906, "rewards_train/margins_1": 3.207960605621338, "rewards_train/margins_2": 1.2928619384765625, "step": 517 }, { "epoch": 1.55, "logps_train/policy_1_2": -125.66880798339844, "logps_train/policy_1_l": -93.20040130615234, "logps_train/policy_1_w": -123.66194915771484, "logps_train/policy_2_2": -89.76683044433594, "logps_train/policy_2_w": -174.31375122070312, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -76.5, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.2612444162368774, "rewards_train/1-l": -1.6762901544570923, "rewards_train/1-w": 3.1923987865448, "rewards_train/2-2": 2.945192337036133, "rewards_train/2-w": 1.0557340383529663, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.868688941001892, "rewards_train/margins_1": 1.9311543703079224, "rewards_train/margins_2": 1.8894582986831665, "step": 517 }, { "epoch": 1.55, "logps_train/policy_1_2": -106.55523681640625, "logps_train/policy_1_l": -104.2039566040039, "logps_train/policy_1_w": -106.26445007324219, "logps_train/policy_2_2": -77.34252166748047, "logps_train/policy_2_w": -134.1188201904297, "logps_train/ref_1_2": -116.5, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -99.5, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 0.9796327352523804, "rewards_train/1-l": -1.1584084033966064, "rewards_train/1-w": 2.121993064880371, "rewards_train/2-2": 2.2010996341705322, "rewards_train/2-w": 0.6599934101104736, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2804014682769775, "rewards_train/margins_1": 1.1423603296279907, "rewards_train/margins_2": 1.5411062240600586, "step": 517 }, { "epoch": 1.55, "logps_train/policy_1_2": -219.27330017089844, "logps_train/policy_1_l": -223.01535034179688, "logps_train/policy_1_w": -187.43716430664062, "logps_train/policy_2_2": -185.91574096679688, "logps_train/policy_2_w": -217.8472137451172, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -232.0, "logps_train/ref_2_2": -226.0, "logps_train/ref_2_w": -250.0, "rewards_train/1-2": 2.897669792175293, "rewards_train/1-l": -1.979659080505371, "rewards_train/1-w": 4.443783283233643, "rewards_train/2-2": 4.070926189422607, "rewards_train/2-w": 3.2027783393859863, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.423442363739014, "rewards_train/margins_1": 1.5461134910583496, "rewards_train/margins_2": 0.8681478500366211, "step": 517 }, { "epoch": 1.55, "logps_train/policy_1_2": -110.88508605957031, "logps_train/policy_1_l": -119.31090545654297, "logps_train/policy_1_w": -102.95050811767578, "logps_train/policy_2_2": -83.25450134277344, "logps_train/policy_2_w": -138.9990692138672, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.2294597625732422, "rewards_train/1-l": -1.7255237102508545, "rewards_train/1-w": 2.8801441192626953, "rewards_train/2-2": 2.1690807342529297, "rewards_train/2-w": 1.6739212274551392, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.60566782951355, "rewards_train/margins_1": 1.6506843566894531, "rewards_train/margins_2": 0.4951595067977905, "step": 517 }, { "epoch": 1.55, "logps_train/policy_1_2": -198.95199584960938, "logps_train/policy_1_l": -250.39407348632812, "logps_train/policy_1_w": -216.6436004638672, "logps_train/policy_2_2": -156.82040405273438, "logps_train/policy_2_w": -273.17755126953125, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -262.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -292.0, "rewards_train/1-2": 1.673551082611084, "rewards_train/1-l": -3.2237815856933594, "rewards_train/1-w": 4.457514762878418, "rewards_train/2-2": 3.2960846424102783, "rewards_train/2-w": 1.741621971130371, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.681296348571777, "rewards_train/margins_1": 2.783963680267334, "rewards_train/margins_2": 1.5544626712799072, "step": 517 }, { "epoch": 1.55, "logps_train/policy_1_2": -186.6971435546875, "logps_train/policy_1_l": -135.4009552001953, "logps_train/policy_1_w": -68.01934814453125, "logps_train/policy_2_2": -148.9217071533203, "logps_train/policy_2_w": -93.88797760009766, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 0.763098955154419, "rewards_train/1-l": -2.642634630203247, "rewards_train/1-w": 2.501971483230591, "rewards_train/2-2": 2.914860725402832, "rewards_train/2-w": 1.7002646923065186, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.144606113433838, "rewards_train/margins_1": 1.7388725280761719, "rewards_train/margins_2": 1.2145960330963135, "step": 517 }, { "epoch": 1.55, "logps_train/policy_1_2": -158.9343719482422, "logps_train/policy_1_l": -190.96636962890625, "logps_train/policy_1_w": -187.5985107421875, "logps_train/policy_2_2": -123.77195739746094, "logps_train/policy_2_w": -240.20989990234375, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -231.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -250.0, "rewards_train/1-2": 1.9003126621246338, "rewards_train/1-l": -3.1560111045837402, "rewards_train/1-w": 4.296399116516113, "rewards_train/2-2": 3.2751479148864746, "rewards_train/2-w": 1.0571354627609253, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.4524102210998535, "rewards_train/margins_1": 2.3960864543914795, "rewards_train/margins_2": 2.2180124521255493, "step": 517 }, { "epoch": 1.55, "learning_rate": 6.721773128571812e-07, "loss": 0.3447, "step": 518 }, { "epoch": 1.55, "logps_train/policy_1_2": -159.73460388183594, "logps_train/policy_1_l": -154.13088989257812, "logps_train/policy_1_w": -77.43590545654297, "logps_train/policy_2_2": -121.45820617675781, "logps_train/policy_2_w": -105.29437255859375, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -122.5, "rewards_train/1-2": 1.1609139442443848, "rewards_train/1-l": -2.3970727920532227, "rewards_train/1-w": 2.6821906566619873, "rewards_train/2-2": 2.868241310119629, "rewards_train/2-w": 1.7393136024475098, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.07926344871521, "rewards_train/margins_1": 1.5212767124176025, "rewards_train/margins_2": 1.1289277076721191, "step": 518 }, { "epoch": 1.55, "logps_train/policy_1_2": -139.7808380126953, "logps_train/policy_1_l": -124.01409912109375, "logps_train/policy_1_w": -114.13755798339844, "logps_train/policy_2_2": -104.03218078613281, "logps_train/policy_2_w": -159.9827117919922, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -109.5, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.5469156503677368, "rewards_train/1-l": -1.4426209926605225, "rewards_train/1-w": 2.9171035289764404, "rewards_train/2-2": 2.9910202026367188, "rewards_train/2-w": 0.8173543214797974, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.359724521636963, "rewards_train/margins_1": 1.3701878786087036, "rewards_train/margins_2": 2.1736658811569214, "step": 518 }, { "epoch": 1.55, "logps_train/policy_1_2": -129.43409729003906, "logps_train/policy_1_l": -178.24478149414062, "logps_train/policy_1_w": -139.05564880371094, "logps_train/policy_2_2": -97.25575256347656, "logps_train/policy_2_w": -168.9380645751953, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -122.5, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.239403247833252, "rewards_train/1-l": -2.698307514190674, "rewards_train/1-w": 2.7280282974243164, "rewards_train/2-2": 2.532628059387207, "rewards_train/2-w": 1.5843181610107422, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.42633581161499, "rewards_train/margins_1": 1.4886250495910645, "rewards_train/margins_2": 0.9483098983764648, "step": 518 }, { "epoch": 1.55, "logps_train/policy_1_2": -112.0717544555664, "logps_train/policy_1_l": -129.66184997558594, "logps_train/policy_1_w": -134.00765991210938, "logps_train/policy_2_2": -86.39970397949219, "logps_train/policy_2_w": -172.24964904785156, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -112.5, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.8240742683410645, "rewards_train/1-l": -1.7802473306655884, "rewards_train/1-w": 3.4242331981658936, "rewards_train/2-2": 2.6147170066833496, "rewards_train/2-w": 1.8094110488891602, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.204480528831482, "rewards_train/margins_1": 1.600158929824829, "rewards_train/margins_2": 0.8053059577941895, "step": 518 }, { "epoch": 1.55, "logps_train/policy_1_2": -105.72774505615234, "logps_train/policy_1_l": -115.74887084960938, "logps_train/policy_1_w": -69.35403442382812, "logps_train/policy_2_2": -80.63903045654297, "logps_train/policy_2_w": -84.84365844726562, "logps_train/ref_1_2": -118.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -85.5, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": 1.2559852600097656, "rewards_train/1-l": -1.793050765991211, "rewards_train/1-w": 1.6253877878189087, "rewards_train/2-2": 2.200916051864624, "rewards_train/2-w": 1.0117042064666748, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.4184385538101196, "rewards_train/margins_1": 0.36940252780914307, "rewards_train/margins_2": 1.1892118453979492, "step": 518 }, { "epoch": 1.55, "logps_train/policy_1_2": -173.92144775390625, "logps_train/policy_1_l": -159.2378387451172, "logps_train/policy_1_w": -171.04519653320312, "logps_train/policy_2_2": -130.59559631347656, "logps_train/policy_2_w": -212.08554077148438, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 1.3621041774749756, "rewards_train/1-l": -2.242973804473877, "rewards_train/1-w": 3.49035382270813, "rewards_train/2-2": 3.080430746078491, "rewards_train/2-w": 1.575185775756836, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.733327627182007, "rewards_train/margins_1": 2.1282496452331543, "rewards_train/margins_2": 1.5052449703216553, "step": 518 }, { "epoch": 1.55, "logps_train/policy_1_2": -125.38340759277344, "logps_train/policy_1_l": -90.87262725830078, "logps_train/policy_1_w": -55.891845703125, "logps_train/policy_2_2": -85.85150146484375, "logps_train/policy_2_w": -87.1004638671875, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -68.5, "logps_train/ref_1_w": -79.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -97.0, "rewards_train/1-2": 1.3296282291412354, "rewards_train/1-l": -2.226325273513794, "rewards_train/1-w": 2.3092527389526367, "rewards_train/2-2": 2.688678026199341, "rewards_train/2-w": 1.0110467672348022, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.535578012466431, "rewards_train/margins_1": 0.9796245098114014, "rewards_train/margins_2": 1.6776312589645386, "step": 518 }, { "epoch": 1.55, "logps_train/policy_1_2": -164.99453735351562, "logps_train/policy_1_l": -202.88504028320312, "logps_train/policy_1_w": -85.02053833007812, "logps_train/policy_2_2": -124.818359375, "logps_train/policy_2_w": -108.67906188964844, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -122.5, "rewards_train/1-2": 0.8880454301834106, "rewards_train/1-l": -3.2724883556365967, "rewards_train/1-w": 2.098336935043335, "rewards_train/2-2": 2.4431636333465576, "rewards_train/2-w": 1.4000625610351562, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.370825290679932, "rewards_train/margins_1": 1.2102915048599243, "rewards_train/margins_2": 1.0431010723114014, "step": 518 }, { "epoch": 1.55, "logps_train/policy_1_2": -163.40426635742188, "logps_train/policy_1_l": -273.9799499511719, "logps_train/policy_1_w": -190.98573303222656, "logps_train/policy_2_2": -128.7137908935547, "logps_train/policy_2_w": -235.89703369140625, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -250.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -253.0, "rewards_train/1-2": 1.9205102920532227, "rewards_train/1-l": -2.4683079719543457, "rewards_train/1-w": 3.2971296310424805, "rewards_train/2-2": 2.8325271606445312, "rewards_train/2-w": 1.6876404285430908, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.765437602996826, "rewards_train/margins_1": 1.3766193389892578, "rewards_train/margins_2": 1.1448867321014404, "step": 519 }, { "epoch": 1.55, "logps_train/policy_1_2": -145.1703338623047, "logps_train/policy_1_l": -187.0472412109375, "logps_train/policy_1_w": -131.24623107910156, "logps_train/policy_2_2": -107.47789001464844, "logps_train/policy_2_w": -162.9791259765625, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.5610921382904053, "rewards_train/1-l": -2.5982789993286133, "rewards_train/1-w": 3.1433448791503906, "rewards_train/2-2": 3.0053353309631348, "rewards_train/2-w": 1.6530632972717285, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.741623878479004, "rewards_train/margins_1": 1.5822527408599854, "rewards_train/margins_2": 1.3522720336914062, "step": 519 }, { "epoch": 1.55, "logps_train/policy_1_2": -111.61568450927734, "logps_train/policy_1_l": -195.1752166748047, "logps_train/policy_1_w": -102.99668884277344, "logps_train/policy_2_2": -74.2410888671875, "logps_train/policy_2_w": -140.11245727539062, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -101.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.4290566444396973, "rewards_train/1-l": -3.270988941192627, "rewards_train/1-w": 3.036269187927246, "rewards_train/2-2": 2.6883914470672607, "rewards_train/2-w": 1.551255226135254, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.307258129119873, "rewards_train/margins_1": 1.6072125434875488, "rewards_train/margins_2": 1.1371362209320068, "step": 519 }, { "epoch": 1.55, "logps_train/policy_1_2": -125.96086120605469, "logps_train/policy_1_l": -101.43633270263672, "logps_train/policy_1_w": -92.71678161621094, "logps_train/policy_2_2": -99.18196105957031, "logps_train/policy_2_w": -120.41061401367188, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -90.5, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -117.5, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 0.7796959280967712, "rewards_train/1-l": -1.1096487045288086, "rewards_train/1-w": 2.155665874481201, "rewards_train/2-2": 1.8161795139312744, "rewards_train/2-w": 0.8749538064002991, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.2653145790100098, "rewards_train/margins_1": 1.37596994638443, "rewards_train/margins_2": 0.9412257075309753, "step": 519 }, { "epoch": 1.55, "logps_train/policy_1_2": -162.104736328125, "logps_train/policy_1_l": -164.98587036132812, "logps_train/policy_1_w": -81.70475006103516, "logps_train/policy_2_2": -116.166015625, "logps_train/policy_2_w": -116.50285339355469, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.8371832370758057, "rewards_train/1-l": -2.1022017002105713, "rewards_train/1-w": 2.849837303161621, "rewards_train/2-2": 2.585742712020874, "rewards_train/2-w": 1.95596444606781, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.952039003372192, "rewards_train/margins_1": 2.0126540660858154, "rewards_train/margins_2": 0.629778265953064, "step": 519 }, { "epoch": 1.55, "logps_train/policy_1_2": -220.1162872314453, "logps_train/policy_1_l": -129.3245391845703, "logps_train/policy_1_w": -127.74480438232422, "logps_train/policy_2_2": -169.57534790039062, "logps_train/policy_2_w": -195.4172821044922, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 0.8633712530136108, "rewards_train/1-l": -1.204719066619873, "rewards_train/1-w": 3.0351881980895996, "rewards_train/2-2": 3.492464542388916, "rewards_train/2-w": 1.1894230842590332, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.239907264709473, "rewards_train/margins_1": 2.1718169450759888, "rewards_train/margins_2": 2.303041458129883, "step": 519 }, { "epoch": 1.55, "logps_train/policy_1_2": -237.86294555664062, "logps_train/policy_1_l": -263.158203125, "logps_train/policy_1_w": -166.3870849609375, "logps_train/policy_2_2": -187.6608123779297, "logps_train/policy_2_w": -209.19021606445312, "logps_train/ref_1_2": -255.0, "logps_train/ref_1_l": -238.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -225.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.72933030128479, "rewards_train/1-l": -2.548633575439453, "rewards_train/1-w": 3.780822992324829, "rewards_train/2-2": 3.7432937622070312, "rewards_train/2-w": 2.067697048187256, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.329456567764282, "rewards_train/margins_1": 2.051492691040039, "rewards_train/margins_2": 1.6755967140197754, "step": 519 }, { "epoch": 1.55, "logps_train/policy_1_2": -97.02896881103516, "logps_train/policy_1_l": -99.13479614257812, "logps_train/policy_1_w": -80.55673217773438, "logps_train/policy_2_2": -65.38167572021484, "logps_train/policy_2_w": -111.99604797363281, "logps_train/ref_1_2": -107.0, "logps_train/ref_1_l": -81.5, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -117.0, "rewards_train/1-2": 1.0205409526824951, "rewards_train/1-l": -1.7799978256225586, "rewards_train/1-w": 2.278200149536133, "rewards_train/2-2": 2.0899572372436523, "rewards_train/2-w": 0.508366584777832, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.058197975158691, "rewards_train/margins_1": 1.2576591968536377, "rewards_train/margins_2": 1.5815906524658203, "step": 519 }, { "epoch": 1.56, "learning_rate": 6.554168023956817e-07, "loss": 0.4317, "step": 520 }, { "epoch": 1.56, "logps_train/policy_1_2": -157.93930053710938, "logps_train/policy_1_l": -193.97125244140625, "logps_train/policy_1_w": -159.43060302734375, "logps_train/policy_2_2": -126.26516723632812, "logps_train/policy_2_w": -207.06228637695312, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 2.284196376800537, "rewards_train/1-l": -2.250640392303467, "rewards_train/1-w": 4.236626625061035, "rewards_train/2-2": 3.2703585624694824, "rewards_train/2-w": 2.3187713623046875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.487267017364502, "rewards_train/margins_1": 1.952430248260498, "rewards_train/margins_2": 0.9515872001647949, "step": 520 }, { "epoch": 1.56, "logps_train/policy_1_2": -141.1826934814453, "logps_train/policy_1_l": -181.28842163085938, "logps_train/policy_1_w": -136.24710083007812, "logps_train/policy_2_2": -114.35507202148438, "logps_train/policy_2_w": -174.50274658203125, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.753605842590332, "rewards_train/1-l": -2.661263942718506, "rewards_train/1-w": 3.4725565910339355, "rewards_train/2-2": 2.911367416381836, "rewards_train/2-w": 1.5649586915969849, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.133820533752441, "rewards_train/margins_1": 1.7189507484436035, "rewards_train/margins_2": 1.346408724784851, "step": 520 }, { "epoch": 1.56, "logps_train/policy_1_2": -129.74227905273438, "logps_train/policy_1_l": -159.78652954101562, "logps_train/policy_1_w": -123.75310516357422, "logps_train/policy_2_2": -108.93963623046875, "logps_train/policy_2_w": -156.330078125, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 2.2054600715637207, "rewards_train/1-l": -2.7275052070617676, "rewards_train/1-w": 3.60750150680542, "rewards_train/2-2": 2.6888487339019775, "rewards_train/2-w": 2.279491901397705, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.3350067138671875, "rewards_train/margins_1": 1.4020414352416992, "rewards_train/margins_2": 0.40935683250427246, "step": 520 }, { "epoch": 1.56, "logps_train/policy_1_2": -113.82386779785156, "logps_train/policy_1_l": -101.5511245727539, "logps_train/policy_1_w": -59.36016082763672, "logps_train/policy_2_2": -81.21636199951172, "logps_train/policy_2_w": -86.20496368408203, "logps_train/ref_1_2": -118.0, "logps_train/ref_1_l": -80.5, "logps_train/ref_1_w": -77.5, "logps_train/ref_2_2": -104.5, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": 0.419175922870636, "rewards_train/1-l": -2.1136085987091064, "rewards_train/1-w": 1.7987496852874756, "rewards_train/2-2": 2.3351998329162598, "rewards_train/2-w": 0.8816521167755127, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.912358283996582, "rewards_train/margins_1": 1.3795737624168396, "rewards_train/margins_2": 1.453547716140747, "step": 520 }, { "epoch": 1.56, "logps_train/policy_1_2": -124.44210815429688, "logps_train/policy_1_l": -113.11300659179688, "logps_train/policy_1_w": -84.97628021240234, "logps_train/policy_2_2": -106.38772583007812, "logps_train/policy_2_w": -105.43338012695312, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -100.5, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 1.612039566040039, "rewards_train/1-l": -1.2590547800064087, "rewards_train/1-w": 2.081278085708618, "rewards_train/2-2": 2.3643527030944824, "rewards_train/2-w": 1.241037368774414, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.340332865715027, "rewards_train/margins_1": 0.4692385196685791, "rewards_train/margins_2": 1.1233153343200684, "step": 520 }, { "epoch": 1.56, "logps_train/policy_1_2": -207.91802978515625, "logps_train/policy_1_l": -222.16455078125, "logps_train/policy_1_w": -210.25949096679688, "logps_train/policy_2_2": -151.492431640625, "logps_train/policy_2_w": -262.58343505859375, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -252.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -280.0, "rewards_train/1-2": 1.3331959247589111, "rewards_train/1-l": -3.3527822494506836, "rewards_train/1-w": 4.058426380157471, "rewards_train/2-2": 4.0960693359375, "rewards_train/2-w": 1.836970567703247, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.411208629608154, "rewards_train/margins_1": 2.7252304553985596, "rewards_train/margins_2": 2.259098768234253, "step": 520 }, { "epoch": 1.56, "logps_train/policy_1_2": -247.27239990234375, "logps_train/policy_1_l": -269.67041015625, "logps_train/policy_1_w": -150.96339416503906, "logps_train/policy_2_2": -179.12893676757812, "logps_train/policy_2_w": -195.07492065429688, "logps_train/ref_1_2": -255.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -219.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 0.7899472117424011, "rewards_train/1-l": -3.9443857669830322, "rewards_train/1-w": 3.592625617980957, "rewards_train/2-2": 3.929293632507324, "rewards_train/2-w": 2.218290328979492, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.537011384963989, "rewards_train/margins_1": 2.802678406238556, "rewards_train/margins_2": 1.711003303527832, "step": 520 }, { "epoch": 1.56, "logps_train/policy_1_2": -225.84132385253906, "logps_train/policy_1_l": -226.6979522705078, "logps_train/policy_1_w": -180.93997192382812, "logps_train/policy_2_2": -181.92279052734375, "logps_train/policy_2_w": -217.9992218017578, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -221.0, "logps_train/ref_2_2": -220.0, "logps_train/ref_2_w": -243.0, "rewards_train/1-2": 2.154148817062378, "rewards_train/1-l": -2.1999950408935547, "rewards_train/1-w": 4.001314640045166, "rewards_train/2-2": 3.7295947074890137, "rewards_train/2-w": 2.517265558242798, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.201309680938721, "rewards_train/margins_1": 1.847165822982788, "rewards_train/margins_2": 1.2123291492462158, "step": 520 }, { "epoch": 1.56, "logps_train/policy_1_2": -237.69049072265625, "logps_train/policy_1_l": -241.720947265625, "logps_train/policy_1_w": -170.88526916503906, "logps_train/policy_2_2": -174.98696899414062, "logps_train/policy_2_w": -246.88839721679688, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -221.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -266.0, "rewards_train/1-2": 2.0122008323669434, "rewards_train/1-l": -2.0451416969299316, "rewards_train/1-w": 4.280223369598389, "rewards_train/2-2": 4.120053768157959, "rewards_train/2-w": 1.898659348487854, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.32536506652832, "rewards_train/margins_1": 2.2680225372314453, "rewards_train/margins_2": 2.221394419670105, "step": 521 }, { "epoch": 1.56, "logps_train/policy_1_2": -184.4315643310547, "logps_train/policy_1_l": -179.1659393310547, "logps_train/policy_1_w": -81.9034194946289, "logps_train/policy_2_2": -140.0943603515625, "logps_train/policy_2_w": -110.23797607421875, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 0.7802813053131104, "rewards_train/1-l": -2.2782630920410156, "rewards_train/1-w": 2.620596408843994, "rewards_train/2-2": 2.8052122592926025, "rewards_train/2-w": 1.69026517868042, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.89885950088501, "rewards_train/margins_1": 1.8403151035308838, "rewards_train/margins_2": 1.1149470806121826, "step": 521 }, { "epoch": 1.56, "logps_train/policy_1_2": -152.90084838867188, "logps_train/policy_1_l": -190.10537719726562, "logps_train/policy_1_w": -143.0757598876953, "logps_train/policy_2_2": -121.17171478271484, "logps_train/policy_2_w": -201.40789794921875, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 2.2927277088165283, "rewards_train/1-l": -4.313467025756836, "rewards_train/1-w": 2.8517990112304688, "rewards_train/2-2": 3.6285314559936523, "rewards_train/2-w": 0.7295211553573608, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.165266036987305, "rewards_train/margins_1": 0.5590713024139404, "rewards_train/margins_2": 2.8990103006362915, "step": 521 }, { "epoch": 1.56, "logps_train/policy_1_2": -207.8108673095703, "logps_train/policy_1_l": -185.17556762695312, "logps_train/policy_1_w": -161.88693237304688, "logps_train/policy_2_2": -177.22415161132812, "logps_train/policy_2_w": -189.22715759277344, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -215.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 2.8439130783081055, "rewards_train/1-l": -1.771854043006897, "rewards_train/1-w": 3.453396797180176, "rewards_train/2-2": 3.801023483276367, "rewards_train/2-w": 2.662928581237793, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.225250840187073, "rewards_train/margins_1": 0.6094837188720703, "rewards_train/margins_2": 1.1380949020385742, "step": 521 }, { "epoch": 1.56, "logps_train/policy_1_2": -124.16969299316406, "logps_train/policy_1_l": -232.52638244628906, "logps_train/policy_1_w": -115.64830780029297, "logps_train/policy_2_2": -97.79590606689453, "logps_train/policy_2_w": -138.24676513671875, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 2.045530319213867, "rewards_train/1-l": -2.8635754585266113, "rewards_train/1-w": 3.0907351970672607, "rewards_train/2-2": 3.1360342502593994, "rewards_train/2-w": 2.290996789932251, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.954310655593872, "rewards_train/margins_1": 1.0452048778533936, "rewards_train/margins_2": 0.8450374603271484, "step": 521 }, { "epoch": 1.56, "logps_train/policy_1_2": -165.28512573242188, "logps_train/policy_1_l": -146.14698791503906, "logps_train/policy_1_w": -87.22286987304688, "logps_train/policy_2_2": -117.263427734375, "logps_train/policy_2_w": -117.11744689941406, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 0.4980487525463104, "rewards_train/1-l": -2.60864520072937, "rewards_train/1-w": 2.48044753074646, "rewards_train/2-2": 2.636157512664795, "rewards_train/2-w": 1.264817237854004, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.08909273147583, "rewards_train/margins_1": 1.9823987782001495, "rewards_train/margins_2": 1.371340274810791, "step": 521 }, { "epoch": 1.56, "logps_train/policy_1_2": -224.43075561523438, "logps_train/policy_1_l": -185.2957763671875, "logps_train/policy_1_w": -127.02771759033203, "logps_train/policy_2_2": -170.30960083007812, "logps_train/policy_2_w": -163.37445068359375, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.622549295425415, "rewards_train/1-l": -1.6503658294677734, "rewards_train/1-w": 2.979259490966797, "rewards_train/2-2": 3.5948219299316406, "rewards_train/2-w": 1.1375558376312256, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.62962532043457, "rewards_train/margins_1": 1.3567101955413818, "rewards_train/margins_2": 2.457266092300415, "step": 521 }, { "epoch": 1.56, "logps_train/policy_1_2": -143.12152099609375, "logps_train/policy_1_l": -220.6380615234375, "logps_train/policy_1_w": -129.21261596679688, "logps_train/policy_2_2": -107.95396423339844, "logps_train/policy_2_w": -175.41348266601562, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.8815983533859253, "rewards_train/1-l": -3.1575567722320557, "rewards_train/1-w": 3.1568641662597656, "rewards_train/2-2": 2.860854148864746, "rewards_train/2-w": 1.5524017810821533, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.314420938491821, "rewards_train/margins_1": 1.2752658128738403, "rewards_train/margins_2": 1.3084523677825928, "step": 521 }, { "epoch": 1.56, "learning_rate": 6.38836320002468e-07, "loss": 0.3927, "step": 522 }, { "epoch": 1.56, "logps_train/policy_1_2": -123.26437377929688, "logps_train/policy_1_l": -169.20721435546875, "logps_train/policy_1_w": -96.18144226074219, "logps_train/policy_2_2": -80.51634216308594, "logps_train/policy_2_w": -139.7741241455078, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.5157499313354492, "rewards_train/1-l": -2.1437692642211914, "rewards_train/1-w": 2.4466991424560547, "rewards_train/2-2": 2.8327410221099854, "rewards_train/2-w": 1.0389950275421143, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.590468406677246, "rewards_train/margins_1": 0.9309492111206055, "rewards_train/margins_2": 1.793745994567871, "step": 522 }, { "epoch": 1.56, "logps_train/policy_1_2": -195.14295959472656, "logps_train/policy_1_l": -223.3866729736328, "logps_train/policy_1_w": -93.05888366699219, "logps_train/policy_2_2": -146.80426025390625, "logps_train/policy_2_w": -127.67142486572266, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 0.9888291954994202, "rewards_train/1-l": -1.8384705781936646, "rewards_train/1-w": 2.9847373962402344, "rewards_train/2-2": 2.7719180583953857, "rewards_train/2-w": 1.7039518356323242, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.823207974433899, "rewards_train/margins_1": 1.9959082007408142, "rewards_train/margins_2": 1.0679662227630615, "step": 522 }, { "epoch": 1.56, "logps_train/policy_1_2": -238.798095703125, "logps_train/policy_1_l": -158.26455688476562, "logps_train/policy_1_w": -165.46896362304688, "logps_train/policy_2_2": -194.1401824951172, "logps_train/policy_2_w": -195.96295166015625, "logps_train/ref_1_2": -252.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -230.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.279172658920288, "rewards_train/1-l": -1.2380279302597046, "rewards_train/1-w": 3.3348917961120605, "rewards_train/2-2": 3.588228702545166, "rewards_train/2-w": 2.1333913803100586, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.572919726371765, "rewards_train/margins_1": 2.0557191371917725, "rewards_train/margins_2": 1.4548373222351074, "step": 522 }, { "epoch": 1.56, "logps_train/policy_1_2": -164.23117065429688, "logps_train/policy_1_l": -158.31924438476562, "logps_train/policy_1_w": -65.68924713134766, "logps_train/policy_2_2": -121.64997863769531, "logps_train/policy_2_w": -107.89132690429688, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -89.5, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -118.5, "rewards_train/1-2": 0.9721964597702026, "rewards_train/1-l": -2.1998939514160156, "rewards_train/1-w": 2.3726766109466553, "rewards_train/2-2": 2.8310956954956055, "rewards_train/2-w": 1.0573513507843018, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.572570562362671, "rewards_train/margins_1": 1.4004801511764526, "rewards_train/margins_2": 1.7737443447113037, "step": 522 }, { "epoch": 1.56, "logps_train/policy_1_2": -186.15451049804688, "logps_train/policy_1_l": -138.6055908203125, "logps_train/policy_1_w": -108.08148193359375, "logps_train/policy_2_2": -145.1661834716797, "logps_train/policy_2_w": -144.99935913085938, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 2.0032973289489746, "rewards_train/1-l": -1.3851678371429443, "rewards_train/1-w": 3.2695863246917725, "rewards_train/2-2": 3.5833821296691895, "rewards_train/2-w": 2.175065517425537, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.654754161834717, "rewards_train/margins_1": 1.2662889957427979, "rewards_train/margins_2": 1.4083166122436523, "step": 522 }, { "epoch": 1.56, "logps_train/policy_1_2": -112.69532775878906, "logps_train/policy_1_l": -169.92138671875, "logps_train/policy_1_w": -88.75780487060547, "logps_train/policy_2_2": -91.02790069580078, "logps_train/policy_2_w": -114.37683868408203, "logps_train/ref_1_2": -125.5, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.2859350442886353, "rewards_train/1-l": -3.2441911697387695, "rewards_train/1-w": 2.7753915786743164, "rewards_train/2-2": 2.3212332725524902, "rewards_train/2-w": 1.4435663223266602, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.019582748413086, "rewards_train/margins_1": 1.4894565343856812, "rewards_train/margins_2": 0.8776669502258301, "step": 522 }, { "epoch": 1.56, "logps_train/policy_1_2": -149.80050659179688, "logps_train/policy_1_l": -178.35191345214844, "logps_train/policy_1_w": -79.08473205566406, "logps_train/policy_2_2": -99.40682983398438, "logps_train/policy_2_w": -120.21525573730469, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.0168254375457764, "rewards_train/1-l": -2.4937849044799805, "rewards_train/1-w": 2.6227774620056152, "rewards_train/2-2": 2.6983795166015625, "rewards_train/2-w": 1.0769115686416626, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.116562366485596, "rewards_train/margins_1": 1.6059520244598389, "rewards_train/margins_2": 1.6214679479599, "step": 522 }, { "epoch": 1.56, "logps_train/policy_1_2": -178.45626831054688, "logps_train/policy_1_l": -210.10549926757812, "logps_train/policy_1_w": -153.9117431640625, "logps_train/policy_2_2": -134.8822021484375, "logps_train/policy_2_w": -216.60227966308594, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -227.0, "rewards_train/1-2": 1.354764699935913, "rewards_train/1-l": -3.3272485733032227, "rewards_train/1-w": 3.7775766849517822, "rewards_train/2-2": 2.8453736305236816, "rewards_train/2-w": 1.0475846529006958, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.104825258255005, "rewards_train/margins_1": 2.422811985015869, "rewards_train/margins_2": 1.7977889776229858, "step": 522 }, { "epoch": 1.57, "logps_train/policy_1_2": -88.050048828125, "logps_train/policy_1_l": -111.78973388671875, "logps_train/policy_1_w": -92.40402221679688, "logps_train/policy_2_2": -63.16065216064453, "logps_train/policy_2_w": -127.46214294433594, "logps_train/ref_1_2": -106.5, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -90.5, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.8606204986572266, "rewards_train/1-l": -2.08131742477417, "rewards_train/1-w": 2.998660087585449, "rewards_train/2-2": 2.7292470932006836, "rewards_train/2-w": 1.267848014831543, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.079977512359619, "rewards_train/margins_1": 1.1380395889282227, "rewards_train/margins_2": 1.4613990783691406, "step": 523 }, { "epoch": 1.57, "logps_train/policy_1_2": -164.47653198242188, "logps_train/policy_1_l": -150.06370544433594, "logps_train/policy_1_w": -105.829345703125, "logps_train/policy_2_2": -126.51919555664062, "logps_train/policy_2_w": -132.66659545898438, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 2.046097755432129, "rewards_train/1-l": -1.679807424545288, "rewards_train/1-w": 3.1873779296875, "rewards_train/2-2": 3.4699554443359375, "rewards_train/2-w": 1.9770910739898682, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.867185354232788, "rewards_train/margins_1": 1.141280174255371, "rewards_train/margins_2": 1.4928643703460693, "step": 523 }, { "epoch": 1.57, "logps_train/policy_1_2": -61.333709716796875, "logps_train/policy_1_l": -113.36233520507812, "logps_train/policy_1_w": -58.92719268798828, "logps_train/policy_2_2": -47.894309997558594, "logps_train/policy_2_w": -83.166015625, "logps_train/ref_1_2": -78.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -67.5, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 1.6803008317947388, "rewards_train/1-l": -1.9104522466659546, "rewards_train/1-w": 2.724468231201172, "rewards_train/2-2": 1.9649631977081299, "rewards_train/2-w": 1.4873054027557373, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.6349204778671265, "rewards_train/margins_1": 1.044167399406433, "rewards_train/margins_2": 0.4776577949523926, "step": 523 }, { "epoch": 1.57, "logps_train/policy_1_2": -231.03985595703125, "logps_train/policy_1_l": -164.56675720214844, "logps_train/policy_1_w": -193.138427734375, "logps_train/policy_2_2": -176.57318115234375, "logps_train/policy_2_w": -237.81118774414062, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -232.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.6233580112457275, "rewards_train/1-l": -1.7393898963928223, "rewards_train/1-w": 3.845534324645996, "rewards_train/2-2": 4.187017917633057, "rewards_train/2-w": 1.6610686779022217, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.584924221038818, "rewards_train/margins_1": 2.2221763134002686, "rewards_train/margins_2": 2.525949239730835, "step": 523 }, { "epoch": 1.57, "logps_train/policy_1_2": -216.80641174316406, "logps_train/policy_1_l": -207.29151916503906, "logps_train/policy_1_w": -156.15573120117188, "logps_train/policy_2_2": -183.50621032714844, "logps_train/policy_2_w": -192.81173706054688, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 2.1177961826324463, "rewards_train/1-l": -2.4479026794433594, "rewards_train/1-w": 3.471926689147949, "rewards_train/2-2": 3.4478163719177246, "rewards_train/2-w": 2.078200578689575, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.919829368591309, "rewards_train/margins_1": 1.354130506515503, "rewards_train/margins_2": 1.3696157932281494, "step": 523 }, { "epoch": 1.57, "logps_train/policy_1_2": -104.9888916015625, "logps_train/policy_1_l": -91.30723571777344, "logps_train/policy_1_w": -82.78021240234375, "logps_train/policy_2_2": -82.57838439941406, "logps_train/policy_2_w": -104.01988220214844, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -78.5, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 1.4284543991088867, "rewards_train/1-l": -1.3002548217773438, "rewards_train/1-w": 2.225884437561035, "rewards_train/2-2": 2.2456769943237305, "rewards_train/2-w": 1.231215000152588, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.526139259338379, "rewards_train/margins_1": 0.7974300384521484, "rewards_train/margins_2": 1.0144619941711426, "step": 523 }, { "epoch": 1.57, "logps_train/policy_1_2": -135.37896728515625, "logps_train/policy_1_l": -105.01704406738281, "logps_train/policy_1_w": -94.78285217285156, "logps_train/policy_2_2": -107.77334594726562, "logps_train/policy_2_w": -124.32050323486328, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 1.3308517932891846, "rewards_train/1-l": -1.195845127105713, "rewards_train/1-w": 2.849839448928833, "rewards_train/2-2": 2.3820407390594482, "rewards_train/2-w": 1.8913869857788086, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.045684576034546, "rewards_train/margins_1": 1.5189876556396484, "rewards_train/margins_2": 0.49065375328063965, "step": 523 }, { "epoch": 1.57, "logps_train/policy_1_2": -101.52079772949219, "logps_train/policy_1_l": -109.45492553710938, "logps_train/policy_1_w": -71.66830444335938, "logps_train/policy_2_2": -69.44147491455078, "logps_train/policy_2_w": -101.38274383544922, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -89.5, "logps_train/ref_1_w": -94.0, "logps_train/ref_2_2": -92.5, "logps_train/ref_2_w": -112.0, "rewards_train/1-2": 1.041670560836792, "rewards_train/1-l": -1.9892425537109375, "rewards_train/1-w": 2.2517242431640625, "rewards_train/2-2": 2.2956960201263428, "rewards_train/2-w": 1.0892648696899414, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.240966796875, "rewards_train/margins_1": 1.2100536823272705, "rewards_train/margins_2": 1.2064311504364014, "step": 523 }, { "epoch": 1.57, "learning_rate": 6.224374839033928e-07, "loss": 0.4037, "step": 524 }, { "epoch": 1.57, "logps_train/policy_1_2": -101.10298919677734, "logps_train/policy_1_l": -61.616756439208984, "logps_train/policy_1_w": -57.8233642578125, "logps_train/policy_2_2": -78.40673065185547, "logps_train/policy_2_w": -74.78952026367188, "logps_train/ref_1_2": -108.0, "logps_train/ref_1_l": -48.75, "logps_train/ref_1_w": -75.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -86.0, "rewards_train/1-2": 0.7022005319595337, "rewards_train/1-l": -1.28599214553833, "rewards_train/1-w": 1.7012577056884766, "rewards_train/2-2": 1.5503426790237427, "rewards_train/2-w": 1.0997586250305176, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.9872498512268066, "rewards_train/margins_1": 0.9990571737289429, "rewards_train/margins_2": 0.4505840539932251, "step": 524 }, { "epoch": 1.57, "logps_train/policy_1_2": -82.28153228759766, "logps_train/policy_1_l": -71.03594207763672, "logps_train/policy_1_w": -75.63777923583984, "logps_train/policy_2_2": -67.1341552734375, "logps_train/policy_2_w": -95.13436889648438, "logps_train/ref_1_2": -102.0, "logps_train/ref_1_l": -60.0, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -113.0, "rewards_train/1-2": 1.9827845096588135, "rewards_train/1-l": -1.1121879816055298, "rewards_train/1-w": 2.5455970764160156, "rewards_train/2-2": 2.493225574493408, "rewards_train/2-w": 1.7553131580352783, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6577850580215454, "rewards_train/margins_1": 0.5628125667572021, "rewards_train/margins_2": 0.7379124164581299, "step": 524 }, { "epoch": 1.57, "logps_train/policy_1_2": -120.61965942382812, "logps_train/policy_1_l": -134.9196014404297, "logps_train/policy_1_w": -103.65718078613281, "logps_train/policy_2_2": -90.89497375488281, "logps_train/policy_2_w": -131.0281524658203, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -109.5, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 1.7614719867706299, "rewards_train/1-l": -2.5264806747436523, "rewards_train/1-w": 2.7499077320098877, "rewards_train/2-2": 2.830814838409424, "rewards_train/2-w": 1.4128105640411377, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.27638840675354, "rewards_train/margins_1": 0.9884357452392578, "rewards_train/margins_2": 1.4180042743682861, "step": 524 }, { "epoch": 1.57, "logps_train/policy_1_2": -196.38560485839844, "logps_train/policy_1_l": -169.51763916015625, "logps_train/policy_1_w": -131.03244018554688, "logps_train/policy_2_2": -148.81089782714844, "logps_train/policy_2_w": -171.32960510253906, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 1.4794089794158936, "rewards_train/1-l": -2.2928781509399414, "rewards_train/1-w": 2.927224636077881, "rewards_train/2-2": 3.3331680297851562, "rewards_train/2-w": 1.124852180480957, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.220102787017822, "rewards_train/margins_1": 1.4478156566619873, "rewards_train/margins_2": 2.208315849304199, "step": 524 }, { "epoch": 1.57, "logps_train/policy_1_2": -115.04340362548828, "logps_train/policy_1_l": -180.8709716796875, "logps_train/policy_1_w": -112.7540054321289, "logps_train/policy_2_2": -103.19204711914062, "logps_train/policy_2_w": -137.8389892578125, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 2.1155807971954346, "rewards_train/1-l": -2.5740108489990234, "rewards_train/1-w": 3.3355369567871094, "rewards_train/2-2": 2.464388847351074, "rewards_train/2-w": 2.2129769325256348, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.909547805786133, "rewards_train/margins_1": 1.2199561595916748, "rewards_train/margins_2": 0.25141191482543945, "step": 524 }, { "epoch": 1.57, "logps_train/policy_1_2": -134.50994873046875, "logps_train/policy_1_l": -183.3325958251953, "logps_train/policy_1_w": -109.90065002441406, "logps_train/policy_2_2": -107.82415008544922, "logps_train/policy_2_w": -142.13027954101562, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.4536924362182617, "rewards_train/1-l": -2.3574790954589844, "rewards_train/1-w": 2.410717010498047, "rewards_train/2-2": 2.4753971099853516, "rewards_train/2-w": 1.3525972366333008, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.768196105957031, "rewards_train/margins_1": 0.9570245742797852, "rewards_train/margins_2": 1.1227998733520508, "step": 524 }, { "epoch": 1.57, "logps_train/policy_1_2": -148.48989868164062, "logps_train/policy_1_l": -173.5922088623047, "logps_train/policy_1_w": -110.42257690429688, "logps_train/policy_2_2": -106.43501281738281, "logps_train/policy_2_w": -154.8590087890625, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.1916362047195435, "rewards_train/1-l": -3.1896896362304688, "rewards_train/1-w": 3.5389931201934814, "rewards_train/2-2": 2.684624433517456, "rewards_train/2-w": 1.6812880039215088, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.72868275642395, "rewards_train/margins_1": 2.347356915473938, "rewards_train/margins_2": 1.0033364295959473, "step": 524 }, { "epoch": 1.57, "logps_train/policy_1_2": -208.2159881591797, "logps_train/policy_1_l": -160.41534423828125, "logps_train/policy_1_w": -146.67335510253906, "logps_train/policy_2_2": -169.82821655273438, "logps_train/policy_2_w": -195.58871459960938, "logps_train/ref_1_2": -225.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -201.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.6752760410308838, "rewards_train/1-l": -1.6526679992675781, "rewards_train/1-w": 3.448289632797241, "rewards_train/2-2": 3.0859291553497314, "rewards_train/2-w": 1.478628158569336, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.100957632064819, "rewards_train/margins_1": 1.7730135917663574, "rewards_train/margins_2": 1.6073009967803955, "step": 524 }, { "epoch": 1.57, "logps_train/policy_1_2": -156.01065063476562, "logps_train/policy_1_l": -203.08248901367188, "logps_train/policy_1_w": -184.39382934570312, "logps_train/policy_2_2": -120.29805755615234, "logps_train/policy_2_w": -220.6984405517578, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -236.0, "rewards_train/1-2": 1.3590914011001587, "rewards_train/1-l": -2.4035606384277344, "rewards_train/1-w": 3.221737861633301, "rewards_train/2-2": 3.054959774017334, "rewards_train/2-w": 1.5569149255752563, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.625298500061035, "rewards_train/margins_1": 1.862646460533142, "rewards_train/margins_2": 1.4980448484420776, "step": 525 }, { "epoch": 1.57, "logps_train/policy_1_2": -136.8748016357422, "logps_train/policy_1_l": -143.74681091308594, "logps_train/policy_1_w": -108.3912124633789, "logps_train/policy_2_2": -106.16775512695312, "logps_train/policy_2_w": -150.32489013671875, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.212763786315918, "rewards_train/1-l": -2.8201396465301514, "rewards_train/1-w": 2.898378849029541, "rewards_train/2-2": 2.4413299560546875, "rewards_train/2-w": 1.425714135169983, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.718518495559692, "rewards_train/margins_1": 1.685615062713623, "rewards_train/margins_2": 1.0156158208847046, "step": 525 }, { "epoch": 1.57, "logps_train/policy_1_2": -98.64117431640625, "logps_train/policy_1_l": -55.507328033447266, "logps_train/policy_1_w": -36.45564270019531, "logps_train/policy_2_2": -67.64468383789062, "logps_train/policy_2_w": -49.1635856628418, "logps_train/ref_1_2": -105.5, "logps_train/ref_1_l": -48.5, "logps_train/ref_1_w": -47.5, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -55.0, "rewards_train/1-2": 0.6686955094337463, "rewards_train/1-l": -0.6906738877296448, "rewards_train/1-w": 1.0838303565979004, "rewards_train/2-2": 1.8242037296295166, "rewards_train/2-w": 0.5588366389274597, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.7745042443275452, "rewards_train/margins_1": 0.41513484716415405, "rewards_train/margins_2": 1.2653670907020569, "step": 525 }, { "epoch": 1.57, "logps_train/policy_1_2": -187.74354553222656, "logps_train/policy_1_l": -223.5629119873047, "logps_train/policy_1_w": -142.9033203125, "logps_train/policy_2_2": -142.01658630371094, "logps_train/policy_2_w": -195.85519409179688, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -223.0, "rewards_train/1-2": 1.5346299409866333, "rewards_train/1-l": -3.2219159603118896, "rewards_train/1-w": 4.275878429412842, "rewards_train/2-2": 3.079200267791748, "rewards_train/2-w": 2.7238552570343018, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.4977943897247314, "rewards_train/margins_1": 2.7412484884262085, "rewards_train/margins_2": 0.3553450107574463, "step": 525 }, { "epoch": 1.57, "logps_train/policy_1_2": -153.97607421875, "logps_train/policy_1_l": -149.45391845703125, "logps_train/policy_1_w": -118.77017211914062, "logps_train/policy_2_2": -115.3939437866211, "logps_train/policy_2_w": -156.86517333984375, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.7406736612319946, "rewards_train/1-l": -1.5397284030914307, "rewards_train/1-w": 2.08880352973938, "rewards_train/2-2": 3.162168025970459, "rewards_train/2-w": 1.2902405261993408, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.6285319328308105, "rewards_train/margins_1": 0.34812986850738525, "rewards_train/margins_2": 1.8719274997711182, "step": 525 }, { "epoch": 1.57, "logps_train/policy_1_2": -172.45970153808594, "logps_train/policy_1_l": -94.11679077148438, "logps_train/policy_1_w": -58.99360656738281, "logps_train/policy_2_2": -138.48033142089844, "logps_train/policy_2_w": -83.6771011352539, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -104.0, "rewards_train/1-2": 1.400904655456543, "rewards_train/1-l": -1.6292568445205688, "rewards_train/1-w": 2.8314993381500244, "rewards_train/2-2": 2.787904739379883, "rewards_train/2-w": 1.999477505683899, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.460756182670593, "rewards_train/margins_1": 1.4305946826934814, "rewards_train/margins_2": 0.7884272336959839, "step": 525 }, { "epoch": 1.57, "logps_train/policy_1_2": -200.7920379638672, "logps_train/policy_1_l": -285.49505615234375, "logps_train/policy_1_w": -147.53976440429688, "logps_train/policy_2_2": -165.51663208007812, "logps_train/policy_2_w": -185.72640991210938, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -256.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -203.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 2.3645458221435547, "rewards_train/1-l": -2.862006664276123, "rewards_train/1-w": 3.5710229873657227, "rewards_train/2-2": 3.742086410522461, "rewards_train/2-w": 2.189859390258789, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.433029651641846, "rewards_train/margins_1": 1.206477165222168, "rewards_train/margins_2": 1.5522270202636719, "step": 525 }, { "epoch": 1.57, "logps_train/policy_1_2": -139.78797912597656, "logps_train/policy_1_l": -153.83570861816406, "logps_train/policy_1_w": -87.12364196777344, "logps_train/policy_2_2": -101.49618530273438, "logps_train/policy_2_w": -111.12169647216797, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.3626086711883545, "rewards_train/1-l": -1.7933363914489746, "rewards_train/1-w": 3.1141979694366455, "rewards_train/2-2": 2.831632137298584, "rewards_train/2-w": 1.9597053527832031, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.90753436088562, "rewards_train/margins_1": 1.751589298248291, "rewards_train/margins_2": 0.8719267845153809, "step": 525 }, { "epoch": 1.57, "learning_rate": 6.062218945959497e-07, "loss": 0.4688, "step": 526 }, { "epoch": 1.57, "logps_train/policy_1_2": -105.47122955322266, "logps_train/policy_1_l": -54.98765563964844, "logps_train/policy_1_w": -49.31857681274414, "logps_train/policy_2_2": -74.76731872558594, "logps_train/policy_2_w": -77.9916763305664, "logps_train/ref_1_2": -119.5, "logps_train/ref_1_l": -44.0, "logps_train/ref_1_w": -72.0, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -92.5, "rewards_train/1-2": 1.3911583423614502, "rewards_train/1-l": -1.1014995574951172, "rewards_train/1-w": 2.2329862117767334, "rewards_train/2-2": 2.5201430320739746, "rewards_train/2-w": 1.4688010215759277, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3344857692718506, "rewards_train/margins_1": 0.8418278694152832, "rewards_train/margins_2": 1.0513420104980469, "step": 526 }, { "epoch": 1.57, "logps_train/policy_1_2": -149.86549377441406, "logps_train/policy_1_l": -231.83724975585938, "logps_train/policy_1_w": -141.41937255859375, "logps_train/policy_2_2": -112.391357421875, "logps_train/policy_2_w": -187.49822998046875, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 1.8634498119354248, "rewards_train/1-l": -3.408723831176758, "rewards_train/1-w": 3.4502511024475098, "rewards_train/2-2": 2.761645793914795, "rewards_train/2-w": 1.7345530986785889, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.858974933624268, "rewards_train/margins_1": 1.586801290512085, "rewards_train/margins_2": 1.027092695236206, "step": 526 }, { "epoch": 1.57, "logps_train/policy_1_2": -243.5502471923828, "logps_train/policy_1_l": -226.48219299316406, "logps_train/policy_1_w": -100.53788757324219, "logps_train/policy_2_2": -207.2378387451172, "logps_train/policy_2_w": -125.83553314208984, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -242.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 2.331693649291992, "rewards_train/1-l": -2.706617832183838, "rewards_train/1-w": 3.3123250007629395, "rewards_train/2-2": 3.530122995376587, "rewards_train/2-w": 2.297696113586426, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.018942832946777, "rewards_train/margins_1": 0.9806313514709473, "rewards_train/margins_2": 1.2324268817901611, "step": 526 }, { "epoch": 1.57, "logps_train/policy_1_2": -245.07199096679688, "logps_train/policy_1_l": -262.9751892089844, "logps_train/policy_1_w": -105.97674560546875, "logps_train/policy_2_2": -205.91024780273438, "logps_train/policy_2_w": -128.39016723632812, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -242.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -244.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 2.333425521850586, "rewards_train/1-l": -2.0947842597961426, "rewards_train/1-w": 3.297637939453125, "rewards_train/2-2": 3.7839760780334473, "rewards_train/2-w": 2.6406705379486084, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.392422199249268, "rewards_train/margins_1": 0.9642124176025391, "rewards_train/margins_2": 1.1433055400848389, "step": 526 }, { "epoch": 1.57, "logps_train/policy_1_2": -121.75390625, "logps_train/policy_1_l": -121.03033447265625, "logps_train/policy_1_w": -96.37847900390625, "logps_train/policy_2_2": -100.09307861328125, "logps_train/policy_2_w": -112.60076904296875, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 2.0777342319488525, "rewards_train/1-l": -1.8393621444702148, "rewards_train/1-w": 2.9648866653442383, "rewards_train/2-2": 2.900848388671875, "rewards_train/2-w": 2.4008612632751465, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.804248809814453, "rewards_train/margins_1": 0.8871524333953857, "rewards_train/margins_2": 0.4999871253967285, "step": 526 }, { "epoch": 1.57, "logps_train/policy_1_2": -115.14842987060547, "logps_train/policy_1_l": -171.12020874023438, "logps_train/policy_1_w": -107.16761016845703, "logps_train/policy_2_2": -90.28134155273438, "logps_train/policy_2_w": -138.70787048339844, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.6312508583068848, "rewards_train/1-l": -1.7461273670196533, "rewards_train/1-w": 2.730113983154297, "rewards_train/2-2": 2.5710842609405518, "rewards_train/2-w": 1.3503061532974243, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.47624135017395, "rewards_train/margins_1": 1.098863124847412, "rewards_train/margins_2": 1.2207781076431274, "step": 526 }, { "epoch": 1.57, "logps_train/policy_1_2": -124.52461242675781, "logps_train/policy_1_l": -105.81344604492188, "logps_train/policy_1_w": -80.05567169189453, "logps_train/policy_2_2": -97.63312530517578, "logps_train/policy_2_w": -103.21217346191406, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -86.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 1.6506640911102295, "rewards_train/1-l": -1.9930627346038818, "rewards_train/1-w": 2.739745616912842, "rewards_train/2-2": 2.6648123264312744, "rewards_train/2-w": 1.7975330352783203, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.732808351516724, "rewards_train/margins_1": 1.0890815258026123, "rewards_train/margins_2": 0.8672792911529541, "step": 526 }, { "epoch": 1.57, "logps_train/policy_1_2": -230.1735382080078, "logps_train/policy_1_l": -219.15469360351562, "logps_train/policy_1_w": -181.60769653320312, "logps_train/policy_2_2": -182.27621459960938, "logps_train/policy_2_w": -256.45379638671875, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -236.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -278.0, "rewards_train/1-2": 1.6584274768829346, "rewards_train/1-l": -2.4271888732910156, "rewards_train/1-w": 5.462667465209961, "rewards_train/2-2": 3.5762858390808105, "rewards_train/2-w": 2.0811824798583984, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.889856338500977, "rewards_train/margins_1": 3.8042399883270264, "rewards_train/margins_2": 1.495103359222412, "step": 526 }, { "epoch": 1.58, "logps_train/policy_1_2": -227.07830810546875, "logps_train/policy_1_l": -239.04798889160156, "logps_train/policy_1_w": -215.951416015625, "logps_train/policy_2_2": -189.45245361328125, "logps_train/policy_2_w": -263.71954345703125, "logps_train/ref_1_2": -252.0, "logps_train/ref_1_l": -214.0, "logps_train/ref_1_w": -256.0, "logps_train/ref_2_2": -226.0, "logps_train/ref_2_w": -284.0, "rewards_train/1-2": 2.4515442848205566, "rewards_train/1-l": -2.517299175262451, "rewards_train/1-w": 4.0861101150512695, "rewards_train/2-2": 3.6297550201416016, "rewards_train/2-w": 1.9905455112457275, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.603409290313721, "rewards_train/margins_1": 1.634565830230713, "rewards_train/margins_2": 1.639209508895874, "step": 527 }, { "epoch": 1.58, "logps_train/policy_1_2": -146.45181274414062, "logps_train/policy_1_l": -147.93438720703125, "logps_train/policy_1_w": -84.15713500976562, "logps_train/policy_2_2": -122.3939208984375, "logps_train/policy_2_w": -111.85260009765625, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 1.5563828945159912, "rewards_train/1-l": -2.3762521743774414, "rewards_train/1-w": 2.493661880493164, "rewards_train/2-2": 2.448108434677124, "rewards_train/2-w": 1.5194274187088013, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.8699140548706055, "rewards_train/margins_1": 0.9372789859771729, "rewards_train/margins_2": 0.9286810159683228, "step": 527 }, { "epoch": 1.58, "logps_train/policy_1_2": -220.91195678710938, "logps_train/policy_1_l": -158.51229858398438, "logps_train/policy_1_w": -166.62738037109375, "logps_train/policy_2_2": -176.73553466796875, "logps_train/policy_2_w": -221.7913818359375, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -235.0, "rewards_train/1-2": 1.4244301319122314, "rewards_train/1-l": -1.6199796199798584, "rewards_train/1-w": 3.2524967193603516, "rewards_train/2-2": 3.1920721530914307, "rewards_train/2-w": 1.2810168266296387, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.87247633934021, "rewards_train/margins_1": 1.8280665874481201, "rewards_train/margins_2": 1.911055326461792, "step": 527 }, { "epoch": 1.58, "logps_train/policy_1_2": -198.8353729248047, "logps_train/policy_1_l": -250.3773193359375, "logps_train/policy_1_w": -126.42010498046875, "logps_train/policy_2_2": -158.2110595703125, "logps_train/policy_2_w": -167.63970947265625, "logps_train/ref_1_2": -227.0, "logps_train/ref_1_l": -223.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 2.8383374214172363, "rewards_train/1-l": -2.693981647491455, "rewards_train/1-w": 3.1704893112182617, "rewards_train/2-2": 3.886707305908203, "rewards_train/2-w": 2.001654624938965, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.864470958709717, "rewards_train/margins_1": 0.3321518898010254, "rewards_train/margins_2": 1.8850526809692383, "step": 527 }, { "epoch": 1.58, "logps_train/policy_1_2": -217.67807006835938, "logps_train/policy_1_l": -262.8319396972656, "logps_train/policy_1_w": -148.0115966796875, "logps_train/policy_2_2": -155.61448669433594, "logps_train/policy_2_w": -198.6593475341797, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": 1.5982087850570679, "rewards_train/1-l": -3.365225076675415, "rewards_train/1-w": 3.53946590423584, "rewards_train/2-2": 3.8768324851989746, "rewards_train/2-w": 1.4559392929077148, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.904690980911255, "rewards_train/margins_1": 1.941257119178772, "rewards_train/margins_2": 2.4208931922912598, "step": 527 }, { "epoch": 1.58, "logps_train/policy_1_2": -138.96060180664062, "logps_train/policy_1_l": -169.27374267578125, "logps_train/policy_1_w": -75.68427276611328, "logps_train/policy_2_2": -115.81907653808594, "logps_train/policy_2_w": -96.816650390625, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 1.798958420753479, "rewards_train/1-l": -2.3922173976898193, "rewards_train/1-w": 3.1612606048583984, "rewards_train/2-2": 2.8886494636535645, "rewards_train/2-w": 2.321460485458374, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.553478002548218, "rewards_train/margins_1": 1.3623021841049194, "rewards_train/margins_2": 0.5671889781951904, "step": 527 }, { "epoch": 1.58, "logps_train/policy_1_2": -102.73426818847656, "logps_train/policy_1_l": -97.96013641357422, "logps_train/policy_1_w": -103.07635498046875, "logps_train/policy_2_2": -86.72711181640625, "logps_train/policy_2_w": -142.43865966796875, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.6203231811523438, "rewards_train/1-l": -1.4776544570922852, "rewards_train/1-w": 2.5072076320648193, "rewards_train/2-2": 2.11491060256958, "rewards_train/2-w": 1.2030094861984253, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9848620891571045, "rewards_train/margins_1": 0.8868844509124756, "rewards_train/margins_2": 0.9119011163711548, "step": 527 }, { "epoch": 1.58, "logps_train/policy_1_2": -176.68161010742188, "logps_train/policy_1_l": -184.74404907226562, "logps_train/policy_1_w": -114.1108627319336, "logps_train/policy_2_2": -140.91629028320312, "logps_train/policy_2_w": -151.88851928710938, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 2.059964656829834, "rewards_train/1-l": -2.3056540489196777, "rewards_train/1-w": 3.4061012268066406, "rewards_train/2-2": 3.813840627670288, "rewards_train/2-w": 2.0611484050750732, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.711755275726318, "rewards_train/margins_1": 1.3461365699768066, "rewards_train/margins_2": 1.7526922225952148, "step": 527 }, { "epoch": 1.58, "learning_rate": 5.901911346930688e-07, "loss": 0.3494, "step": 528 }, { "epoch": 1.58, "logps_train/policy_1_2": -205.33926391601562, "logps_train/policy_1_l": -172.0908203125, "logps_train/policy_1_w": -149.827392578125, "logps_train/policy_2_2": -157.8582763671875, "logps_train/policy_2_w": -194.70465087890625, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": 1.8379476070404053, "rewards_train/1-l": -1.987793207168579, "rewards_train/1-w": 3.1948976516723633, "rewards_train/2-2": 3.9047985076904297, "rewards_train/2-w": 1.3959418535232544, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.182690858840942, "rewards_train/margins_1": 1.356950044631958, "rewards_train/margins_2": 2.5088566541671753, "step": 528 }, { "epoch": 1.58, "logps_train/policy_1_2": -102.42717742919922, "logps_train/policy_1_l": -102.94874572753906, "logps_train/policy_1_w": -38.608821868896484, "logps_train/policy_2_2": -76.54254150390625, "logps_train/policy_2_w": -52.658180236816406, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -79.0, "logps_train/ref_1_w": -59.0, "logps_train/ref_2_2": -99.5, "logps_train/ref_2_w": -68.5, "rewards_train/1-2": 1.36860990524292, "rewards_train/1-l": -2.410597085952759, "rewards_train/1-w": 2.0599186420440674, "rewards_train/2-2": 2.285980463027954, "rewards_train/2-w": 1.5916038751602173, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.470515727996826, "rewards_train/margins_1": 0.6913087368011475, "rewards_train/margins_2": 0.6943765878677368, "step": 528 }, { "epoch": 1.58, "logps_train/policy_1_2": -94.01300811767578, "logps_train/policy_1_l": -189.20489501953125, "logps_train/policy_1_w": -103.52507019042969, "logps_train/policy_2_2": -74.63055419921875, "logps_train/policy_2_w": -127.5842056274414, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -101.5, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 1.8207696676254272, "rewards_train/1-l": -2.737870693206787, "rewards_train/1-w": 2.8928050994873047, "rewards_train/2-2": 2.691826820373535, "rewards_train/2-w": 1.743532657623291, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.630675792694092, "rewards_train/margins_1": 1.0720354318618774, "rewards_train/margins_2": 0.9482941627502441, "step": 528 }, { "epoch": 1.58, "logps_train/policy_1_2": -128.3790283203125, "logps_train/policy_1_l": -175.150146484375, "logps_train/policy_1_w": -137.49282836914062, "logps_train/policy_2_2": -90.11045837402344, "logps_train/policy_2_w": -177.31532287597656, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.6074104309082031, "rewards_train/1-l": -2.0384533405303955, "rewards_train/1-w": 3.2819664478302, "rewards_train/2-2": 2.6022353172302246, "rewards_train/2-w": 1.6872175931930542, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.320419788360596, "rewards_train/margins_1": 1.674556016921997, "rewards_train/margins_2": 0.9150177240371704, "step": 528 }, { "epoch": 1.58, "logps_train/policy_1_2": -223.0283203125, "logps_train/policy_1_l": -216.9972686767578, "logps_train/policy_1_w": -167.73654174804688, "logps_train/policy_2_2": -182.632080078125, "logps_train/policy_2_w": -208.60623168945312, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -205.0, "logps_train/ref_2_2": -221.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 2.5706043243408203, "rewards_train/1-l": -1.6731648445129395, "rewards_train/1-w": 3.7325961589813232, "rewards_train/2-2": 3.874293804168701, "rewards_train/2-w": 2.561250686645508, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.405761003494263, "rewards_train/margins_1": 1.161991834640503, "rewards_train/margins_2": 1.3130431175231934, "step": 528 }, { "epoch": 1.58, "logps_train/policy_1_2": -76.4769287109375, "logps_train/policy_1_l": -88.94733428955078, "logps_train/policy_1_w": -70.31371307373047, "logps_train/policy_2_2": -61.365623474121094, "logps_train/policy_2_w": -85.95149230957031, "logps_train/ref_1_2": -93.0, "logps_train/ref_1_l": -72.5, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -84.5, "logps_train/ref_2_w": -99.0, "rewards_train/1-2": 1.6812126636505127, "rewards_train/1-l": -1.6541088819503784, "rewards_train/1-w": 2.25534725189209, "rewards_train/2-2": 2.3142192363739014, "rewards_train/2-w": 1.3196942806243896, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9094561338424683, "rewards_train/margins_1": 0.5741345882415771, "rewards_train/margins_2": 0.9945249557495117, "step": 528 }, { "epoch": 1.58, "logps_train/policy_1_2": -83.6554946899414, "logps_train/policy_1_l": -93.1265869140625, "logps_train/policy_1_w": -72.52701568603516, "logps_train/policy_2_2": -63.047950744628906, "logps_train/policy_2_w": -90.53933715820312, "logps_train/ref_1_2": -95.5, "logps_train/ref_1_l": -75.5, "logps_train/ref_1_w": -95.0, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -104.0, "rewards_train/1-2": 1.1969501972198486, "rewards_train/1-l": -1.770275354385376, "rewards_train/1-w": 2.2736659049987793, "rewards_train/2-2": 2.1772360801696777, "rewards_train/2-w": 1.3398165702819824, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.043941259384155, "rewards_train/margins_1": 1.0767157077789307, "rewards_train/margins_2": 0.8374195098876953, "step": 528 }, { "epoch": 1.58, "logps_train/policy_1_2": -87.86154174804688, "logps_train/policy_1_l": -58.574867248535156, "logps_train/policy_1_w": -95.45134735107422, "logps_train/policy_2_2": -59.65184020996094, "logps_train/policy_2_w": -145.70526123046875, "logps_train/ref_1_2": -98.5, "logps_train/ref_1_l": -46.5, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -81.5, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.05759596824646, "rewards_train/1-l": -1.208707571029663, "rewards_train/1-w": 3.482990264892578, "rewards_train/2-2": 2.1797869205474854, "rewards_train/2-w": 1.4716615676879883, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.691697835922241, "rewards_train/margins_1": 2.425394296646118, "rewards_train/margins_2": 0.7081253528594971, "step": 528 }, { "epoch": 1.58, "logps_train/policy_1_2": -153.355712890625, "logps_train/policy_1_l": -163.66903686523438, "logps_train/policy_1_w": -181.96307373046875, "logps_train/policy_2_2": -113.0541763305664, "logps_train/policy_2_w": -233.87832641601562, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -252.0, "rewards_train/1-2": 1.499584436416626, "rewards_train/1-l": -1.3522546291351318, "rewards_train/1-w": 4.203693389892578, "rewards_train/2-2": 3.1117701530456543, "rewards_train/2-w": 1.762165904045105, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.55594801902771, "rewards_train/margins_1": 2.704108953475952, "rewards_train/margins_2": 1.3496042490005493, "step": 529 }, { "epoch": 1.58, "logps_train/policy_1_2": -64.90777587890625, "logps_train/policy_1_l": -65.3893051147461, "logps_train/policy_1_w": -41.23680114746094, "logps_train/policy_2_2": -52.77185821533203, "logps_train/policy_2_w": -56.76390075683594, "logps_train/ref_1_2": -75.0, "logps_train/ref_1_l": -52.5, "logps_train/ref_1_w": -59.5, "logps_train/ref_2_2": -67.0, "logps_train/ref_2_w": -70.0, "rewards_train/1-2": 0.9928156137466431, "rewards_train/1-l": -1.296791911125183, "rewards_train/1-w": 1.8145525455474854, "rewards_train/2-2": 1.4449818134307861, "rewards_train/2-w": 1.2970476150512695, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.1113444566726685, "rewards_train/margins_1": 0.8217369318008423, "rewards_train/margins_2": 0.1479341983795166, "step": 529 }, { "epoch": 1.58, "logps_train/policy_1_2": -70.11521911621094, "logps_train/policy_1_l": -106.01002502441406, "logps_train/policy_1_w": -60.360721588134766, "logps_train/policy_2_2": -50.28247833251953, "logps_train/policy_2_w": -89.33402252197266, "logps_train/ref_1_2": -80.0, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -70.0, "logps_train/ref_2_w": -95.5, "rewards_train/1-2": 1.0033214092254639, "rewards_train/1-l": -1.761549472808838, "rewards_train/1-w": 1.9881465435028076, "rewards_train/2-2": 1.9795646667480469, "rewards_train/2-w": 0.6111292243003845, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7496960163116455, "rewards_train/margins_1": 0.9848251342773438, "rewards_train/margins_2": 1.3684354424476624, "step": 529 }, { "epoch": 1.58, "logps_train/policy_1_2": -143.03610229492188, "logps_train/policy_1_l": -114.84859466552734, "logps_train/policy_1_w": -60.16987609863281, "logps_train/policy_2_2": -99.87625122070312, "logps_train/policy_2_w": -89.84028625488281, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -83.5, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -100.5, "rewards_train/1-2": 0.8284206390380859, "rewards_train/1-l": -1.337984561920166, "rewards_train/1-w": 2.3373091220855713, "rewards_train/2-2": 2.6670620441436768, "rewards_train/2-w": 1.0831594467163086, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6752936840057373, "rewards_train/margins_1": 1.5088884830474854, "rewards_train/margins_2": 1.5839025974273682, "step": 529 }, { "epoch": 1.58, "logps_train/policy_1_2": -133.78660583496094, "logps_train/policy_1_l": -126.45504760742188, "logps_train/policy_1_w": -123.13289642333984, "logps_train/policy_2_2": -102.12751007080078, "logps_train/policy_2_w": -157.03329467773438, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.4400889873504639, "rewards_train/1-l": -1.646163821220398, "rewards_train/1-w": 3.215616226196289, "rewards_train/2-2": 2.8028745651245117, "rewards_train/2-w": 1.7122962474822998, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.861780047416687, "rewards_train/margins_1": 1.7755272388458252, "rewards_train/margins_2": 1.090578317642212, "step": 529 }, { "epoch": 1.58, "logps_train/policy_1_2": -153.3389434814453, "logps_train/policy_1_l": -148.03460693359375, "logps_train/policy_1_w": -116.13389587402344, "logps_train/policy_2_2": -114.92028045654297, "logps_train/policy_2_w": -162.61993408203125, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.1356374025344849, "rewards_train/1-l": -2.3706486225128174, "rewards_train/1-w": 3.034266471862793, "rewards_train/2-2": 2.8431286811828613, "rewards_train/2-w": 0.9971866607666016, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.40491509437561, "rewards_train/margins_1": 1.898629069328308, "rewards_train/margins_2": 1.8459420204162598, "step": 529 }, { "epoch": 1.58, "logps_train/policy_1_2": -236.15350341796875, "logps_train/policy_1_l": -154.32460021972656, "logps_train/policy_1_w": -93.84355163574219, "logps_train/policy_2_2": -182.3591766357422, "logps_train/policy_2_w": -118.44738006591797, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.887775182723999, "rewards_train/1-l": -2.0170304775238037, "rewards_train/1-w": 2.462129592895508, "rewards_train/2-2": 3.6109566688537598, "rewards_train/2-w": 1.5732309818267822, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.4791600704193115, "rewards_train/margins_1": 1.5743544101715088, "rewards_train/margins_2": 2.0377256870269775, "step": 529 }, { "epoch": 1.58, "logps_train/policy_1_2": -183.14158630371094, "logps_train/policy_1_l": -227.76089477539062, "logps_train/policy_1_w": -194.73345947265625, "logps_train/policy_2_2": -141.61422729492188, "logps_train/policy_2_w": -271.46026611328125, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -240.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -294.0, "rewards_train/1-2": 2.7905282974243164, "rewards_train/1-l": -1.9777495861053467, "rewards_train/1-w": 4.471965789794922, "rewards_train/2-2": 4.093264579772949, "rewards_train/2-w": 2.2727246284484863, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.4497153759002686, "rewards_train/margins_1": 1.6814374923706055, "rewards_train/margins_2": 1.820539951324463, "step": 529 }, { "epoch": 1.59, "learning_rate": 5.743467687686563e-07, "loss": 0.4926, "step": 530 }, { "epoch": 1.59, "logps_train/policy_1_2": -125.17572784423828, "logps_train/policy_1_l": -85.30662536621094, "logps_train/policy_1_w": -73.28250122070312, "logps_train/policy_2_2": -92.54753112792969, "logps_train/policy_2_w": -117.36186981201172, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -75.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 0.8035205006599426, "rewards_train/1-l": -1.030858039855957, "rewards_train/1-w": 2.8498752117156982, "rewards_train/2-2": 2.4479806423187256, "rewards_train/2-w": 0.9669380784034729, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8807332515716553, "rewards_train/margins_1": 2.0463547110557556, "rewards_train/margins_2": 1.4810425639152527, "step": 530 }, { "epoch": 1.59, "logps_train/policy_1_2": -107.83726501464844, "logps_train/policy_1_l": -104.50119018554688, "logps_train/policy_1_w": -92.74815368652344, "logps_train/policy_2_2": -92.60198974609375, "logps_train/policy_2_w": -116.1421127319336, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -81.5, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.8881480693817139, "rewards_train/1-l": -2.301682233810425, "rewards_train/1-w": 3.063857078552246, "rewards_train/2-2": 2.3745670318603516, "rewards_train/2-w": 1.976413607597351, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.365539312362671, "rewards_train/margins_1": 1.1757090091705322, "rewards_train/margins_2": 0.3981534242630005, "step": 530 }, { "epoch": 1.59, "logps_train/policy_1_2": -65.39190673828125, "logps_train/policy_1_l": -82.54856872558594, "logps_train/policy_1_w": -34.9837532043457, "logps_train/policy_2_2": -49.34841537475586, "logps_train/policy_2_w": -56.38899230957031, "logps_train/ref_1_2": -73.0, "logps_train/ref_1_l": -68.0, "logps_train/ref_1_w": -56.0, "logps_train/ref_2_2": -63.75, "logps_train/ref_2_w": -68.5, "rewards_train/1-2": 0.7763372659683228, "rewards_train/1-l": -1.4516339302062988, "rewards_train/1-w": 2.0881481170654297, "rewards_train/2-2": 1.4338111877441406, "rewards_train/2-w": 1.2087572813034058, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.5397820472717285, "rewards_train/margins_1": 1.311810851097107, "rewards_train/margins_2": 0.22505390644073486, "step": 530 }, { "epoch": 1.59, "logps_train/policy_1_2": -160.3916015625, "logps_train/policy_1_l": -253.9143829345703, "logps_train/policy_1_w": -116.40762329101562, "logps_train/policy_2_2": -125.10856628417969, "logps_train/policy_2_w": -167.10244750976562, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": 1.065528392791748, "rewards_train/1-l": -2.4689767360687256, "rewards_train/1-w": 2.9451749324798584, "rewards_train/2-2": 2.591486692428589, "rewards_train/2-w": 0.9600683450698853, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.414151668548584, "rewards_train/margins_1": 1.8796465396881104, "rewards_train/margins_2": 1.6314183473587036, "step": 530 }, { "epoch": 1.59, "logps_train/policy_1_2": -177.87144470214844, "logps_train/policy_1_l": -188.54220581054688, "logps_train/policy_1_w": -138.6232452392578, "logps_train/policy_2_2": -145.23861694335938, "logps_train/policy_2_w": -182.41883850097656, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 1.6198874711990356, "rewards_train/1-l": -2.291818380355835, "rewards_train/1-w": 2.081815719604492, "rewards_train/2-2": 3.205141067504883, "rewards_train/2-w": 0.47540175914764404, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.373634099960327, "rewards_train/margins_1": 0.46192824840545654, "rewards_train/margins_2": 2.7297393083572388, "step": 530 }, { "epoch": 1.59, "logps_train/policy_1_2": -243.4381866455078, "logps_train/policy_1_l": -193.68002319335938, "logps_train/policy_1_w": -123.64584350585938, "logps_train/policy_2_2": -195.67210388183594, "logps_train/policy_2_w": -169.354736328125, "logps_train/ref_1_2": -266.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -238.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 2.290555953979492, "rewards_train/1-l": -1.9316973686218262, "rewards_train/1-w": 4.001040458679199, "rewards_train/2-2": 4.239039421081543, "rewards_train/2-w": 2.423900842666626, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.932737827301025, "rewards_train/margins_1": 1.710484504699707, "rewards_train/margins_2": 1.815138578414917, "step": 530 }, { "epoch": 1.59, "logps_train/policy_1_2": -120.07925415039062, "logps_train/policy_1_l": -129.20574951171875, "logps_train/policy_1_w": -90.7168960571289, "logps_train/policy_2_2": -92.40728759765625, "logps_train/policy_2_w": -115.50894927978516, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 1.431917667388916, "rewards_train/1-l": -1.6356616020202637, "rewards_train/1-w": 2.3893961906433105, "rewards_train/2-2": 2.3369088172912598, "rewards_train/2-w": 1.1705894470214844, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.025057792663574, "rewards_train/margins_1": 0.9574785232543945, "rewards_train/margins_2": 1.1663193702697754, "step": 530 }, { "epoch": 1.59, "logps_train/policy_1_2": -111.84077453613281, "logps_train/policy_1_l": -117.49391174316406, "logps_train/policy_1_w": -127.55784606933594, "logps_train/policy_2_2": -86.83837890625, "logps_train/policy_2_w": -156.38638305664062, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.5635788440704346, "rewards_train/1-l": -0.8978279232978821, "rewards_train/1-w": 2.686305284500122, "rewards_train/2-2": 2.4446778297424316, "rewards_train/2-w": 1.2701505422592163, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.584133207798004, "rewards_train/margins_1": 1.1227264404296875, "rewards_train/margins_2": 1.1745272874832153, "step": 530 }, { "epoch": 1.59, "logps_train/policy_1_2": -178.8690643310547, "logps_train/policy_1_l": -181.30213928222656, "logps_train/policy_1_w": -196.97280883789062, "logps_train/policy_2_2": -141.83689880371094, "logps_train/policy_2_w": -249.04615783691406, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -237.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 2.417780876159668, "rewards_train/1-l": -1.5247446298599243, "rewards_train/1-w": 4.002718448638916, "rewards_train/2-2": 3.4272472858428955, "rewards_train/2-w": 2.0688223838806152, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.52746307849884, "rewards_train/margins_1": 1.584937572479248, "rewards_train/margins_2": 1.3584249019622803, "step": 531 }, { "epoch": 1.59, "logps_train/policy_1_2": -140.45669555664062, "logps_train/policy_1_l": -192.76986694335938, "logps_train/policy_1_w": -101.74777221679688, "logps_train/policy_2_2": -116.22985076904297, "logps_train/policy_2_w": -139.2283477783203, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.807456612586975, "rewards_train/1-l": -2.0748376846313477, "rewards_train/1-w": 3.162722110748291, "rewards_train/2-2": 2.748889446258545, "rewards_train/2-w": 1.5037274360656738, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.237559795379639, "rewards_train/margins_1": 1.355265498161316, "rewards_train/margins_2": 1.245162010192871, "step": 531 }, { "epoch": 1.59, "logps_train/policy_1_2": -174.32168579101562, "logps_train/policy_1_l": -172.15333557128906, "logps_train/policy_1_w": -157.58584594726562, "logps_train/policy_2_2": -133.61685180664062, "logps_train/policy_2_w": -208.17144775390625, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 1.9897058010101318, "rewards_train/1-l": -0.8800308108329773, "rewards_train/1-w": 3.476572036743164, "rewards_train/2-2": 3.7453458309173584, "rewards_train/2-w": 1.5891048908233643, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.356602847576141, "rewards_train/margins_1": 1.4868662357330322, "rewards_train/margins_2": 2.156240940093994, "step": 531 }, { "epoch": 1.59, "logps_train/policy_1_2": -210.98773193359375, "logps_train/policy_1_l": -193.41464233398438, "logps_train/policy_1_w": -137.634033203125, "logps_train/policy_2_2": -156.4019012451172, "logps_train/policy_2_w": -188.2184600830078, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": 1.9309147596359253, "rewards_train/1-l": -2.5399012565612793, "rewards_train/1-w": 3.8459722995758057, "rewards_train/2-2": 3.7504348754882812, "rewards_train/2-w": 2.0687785148620605, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.385873556137085, "rewards_train/margins_1": 1.9150575399398804, "rewards_train/margins_2": 1.6816563606262207, "step": 531 }, { "epoch": 1.59, "logps_train/policy_1_2": -110.2706298828125, "logps_train/policy_1_l": -101.31163024902344, "logps_train/policy_1_w": -66.5025634765625, "logps_train/policy_2_2": -86.66072082519531, "logps_train/policy_2_w": -97.8230972290039, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 1.24552583694458, "rewards_train/1-l": -1.7003710269927979, "rewards_train/1-w": 2.6169309616088867, "rewards_train/2-2": 1.983250617980957, "rewards_train/2-w": 1.3130028247833252, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.317301988601685, "rewards_train/margins_1": 1.3714051246643066, "rewards_train/margins_2": 0.6702477931976318, "step": 531 }, { "epoch": 1.59, "logps_train/policy_1_2": -192.55194091796875, "logps_train/policy_1_l": -176.8064422607422, "logps_train/policy_1_w": -106.6701889038086, "logps_train/policy_2_2": -143.92352294921875, "logps_train/policy_2_w": -140.50157165527344, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.6260559558868408, "rewards_train/1-l": -2.541581392288208, "rewards_train/1-w": 3.5548558235168457, "rewards_train/2-2": 3.751398801803589, "rewards_train/2-w": 2.509218215942383, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.096437215805054, "rewards_train/margins_1": 1.9287998676300049, "rewards_train/margins_2": 1.242180585861206, "step": 531 }, { "epoch": 1.59, "logps_train/policy_1_2": -214.53225708007812, "logps_train/policy_1_l": -174.28091430664062, "logps_train/policy_1_w": -124.464599609375, "logps_train/policy_2_2": -174.40408325195312, "logps_train/policy_2_w": -157.12713623046875, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": 2.2686495780944824, "rewards_train/1-l": -2.499967098236084, "rewards_train/1-w": 2.650024175643921, "rewards_train/2-2": 3.769746780395508, "rewards_train/2-w": 1.3790823221206665, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.149991273880005, "rewards_train/margins_1": 0.3813745975494385, "rewards_train/margins_2": 2.3906644582748413, "step": 531 }, { "epoch": 1.59, "logps_train/policy_1_2": -121.85322570800781, "logps_train/policy_1_l": -194.95892333984375, "logps_train/policy_1_w": -105.38327026367188, "logps_train/policy_2_2": -94.3969497680664, "logps_train/policy_2_w": -145.31997680664062, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.845927119255066, "rewards_train/1-l": -2.81654691696167, "rewards_train/1-w": 3.0147972106933594, "rewards_train/2-2": 2.9181175231933594, "rewards_train/2-w": 1.5867528915405273, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.831344127655029, "rewards_train/margins_1": 1.1688700914382935, "rewards_train/margins_2": 1.331364631652832, "step": 531 }, { "epoch": 1.59, "learning_rate": 5.586903432048943e-07, "loss": 0.4224, "step": 532 }, { "epoch": 1.59, "logps_train/policy_1_2": -147.53729248046875, "logps_train/policy_1_l": -187.305419921875, "logps_train/policy_1_w": -124.22859954833984, "logps_train/policy_2_2": -119.50468444824219, "logps_train/policy_2_w": -165.8643798828125, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 2.1986148357391357, "rewards_train/1-l": -2.477466583251953, "rewards_train/1-w": 3.542374610900879, "rewards_train/2-2": 3.0467967987060547, "rewards_train/2-w": 1.8901240825653076, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.019841194152832, "rewards_train/margins_1": 1.3437597751617432, "rewards_train/margins_2": 1.156672716140747, "step": 532 }, { "epoch": 1.59, "logps_train/policy_1_2": -125.15325927734375, "logps_train/policy_1_l": -215.40162658691406, "logps_train/policy_1_w": -78.01534271240234, "logps_train/policy_2_2": -94.03105163574219, "logps_train/policy_2_w": -114.48481750488281, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -125.5, "rewards_train/1-2": 0.9545961618423462, "rewards_train/1-l": -2.2479746341705322, "rewards_train/1-w": 2.15315318107605, "rewards_train/2-2": 1.9218943119049072, "rewards_train/2-w": 1.0952680110931396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.401127815246582, "rewards_train/margins_1": 1.1985570192337036, "rewards_train/margins_2": 0.8266263008117676, "step": 532 }, { "epoch": 1.59, "logps_train/policy_1_2": -189.39622497558594, "logps_train/policy_1_l": -205.83013916015625, "logps_train/policy_1_w": -97.36386108398438, "logps_train/policy_2_2": -142.63482666015625, "logps_train/policy_2_w": -141.1413116455078, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 1.5603779554367065, "rewards_train/1-l": -3.5314512252807617, "rewards_train/1-w": 2.9823641777038574, "rewards_train/2-2": 3.174018144607544, "rewards_train/2-w": 1.7874315977096558, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.513815402984619, "rewards_train/margins_1": 1.4219862222671509, "rewards_train/margins_2": 1.3865865468978882, "step": 532 }, { "epoch": 1.59, "logps_train/policy_1_2": -181.87435913085938, "logps_train/policy_1_l": -220.54156494140625, "logps_train/policy_1_w": -119.56732177734375, "logps_train/policy_2_2": -130.3978729248047, "logps_train/policy_2_w": -163.99969482421875, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 1.2313153743743896, "rewards_train/1-l": -2.0205624103546143, "rewards_train/1-w": 3.24326753616333, "rewards_train/2-2": 3.128962993621826, "rewards_train/2-w": 1.9062813520431519, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.263829946517944, "rewards_train/margins_1": 2.0119521617889404, "rewards_train/margins_2": 1.2226816415786743, "step": 532 }, { "epoch": 1.59, "logps_train/policy_1_2": -167.18170166015625, "logps_train/policy_1_l": -201.66253662109375, "logps_train/policy_1_w": -94.4012680053711, "logps_train/policy_2_2": -126.12876892089844, "logps_train/policy_2_w": -124.28126525878906, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.1052680015563965, "rewards_train/1-l": -2.5400824546813965, "rewards_train/1-w": 2.946592092514038, "rewards_train/2-2": 2.677748680114746, "rewards_train/2-w": 1.6957006454467773, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.486674547195435, "rewards_train/margins_1": 1.8413240909576416, "rewards_train/margins_2": 0.9820480346679688, "step": 532 }, { "epoch": 1.59, "logps_train/policy_1_2": -138.25228881835938, "logps_train/policy_1_l": -128.23948669433594, "logps_train/policy_1_w": -107.33679962158203, "logps_train/policy_2_2": -108.1614990234375, "logps_train/policy_2_w": -146.71133422851562, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 1.398208737373352, "rewards_train/1-l": -1.7587144374847412, "rewards_train/1-w": 2.830479860305786, "rewards_train/2-2": 2.65670108795166, "rewards_train/2-w": 1.2112889289855957, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.589194297790527, "rewards_train/margins_1": 1.432271122932434, "rewards_train/margins_2": 1.4454121589660645, "step": 532 }, { "epoch": 1.59, "logps_train/policy_1_2": -170.65020751953125, "logps_train/policy_1_l": -249.8450927734375, "logps_train/policy_1_w": -94.55211639404297, "logps_train/policy_2_2": -133.4315185546875, "logps_train/policy_2_w": -132.5438232421875, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -231.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.7111014127731323, "rewards_train/1-l": -1.9058469533920288, "rewards_train/1-w": 3.4404919147491455, "rewards_train/2-2": 2.868541955947876, "rewards_train/2-w": 2.43624210357666, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.346338868141174, "rewards_train/margins_1": 1.7293905019760132, "rewards_train/margins_2": 0.4322998523712158, "step": 532 }, { "epoch": 1.59, "logps_train/policy_1_2": -167.61358642578125, "logps_train/policy_1_l": -151.12220764160156, "logps_train/policy_1_w": -122.30319213867188, "logps_train/policy_2_2": -126.58660125732422, "logps_train/policy_2_w": -165.6822967529297, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.6994316577911377, "rewards_train/1-l": -2.1279916763305664, "rewards_train/1-w": 3.5399932861328125, "rewards_train/2-2": 3.387457847595215, "rewards_train/2-w": 1.7938793897628784, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.667984962463379, "rewards_train/margins_1": 1.8405616283416748, "rewards_train/margins_2": 1.5935784578323364, "step": 532 }, { "epoch": 1.6, "logps_train/policy_1_2": -94.61738586425781, "logps_train/policy_1_l": -100.11709594726562, "logps_train/policy_1_w": -90.96309661865234, "logps_train/policy_2_2": -75.00899505615234, "logps_train/policy_2_w": -112.8266830444336, "logps_train/ref_1_2": -109.5, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -97.5, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 1.491776943206787, "rewards_train/1-l": -1.1284818649291992, "rewards_train/1-w": 1.9837682247161865, "rewards_train/2-2": 2.264725923538208, "rewards_train/2-w": 0.9384254217147827, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.1122500896453857, "rewards_train/margins_1": 0.4919912815093994, "rewards_train/margins_2": 1.3263005018234253, "step": 533 }, { "epoch": 1.6, "logps_train/policy_1_2": -109.51666259765625, "logps_train/policy_1_l": -172.93927001953125, "logps_train/policy_1_w": -120.17919921875, "logps_train/policy_2_2": -89.46720123291016, "logps_train/policy_2_w": -155.83889770507812, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 1.4827091693878174, "rewards_train/1-l": -2.1351375579833984, "rewards_train/1-w": 2.798096179962158, "rewards_train/2-2": 1.9677332639694214, "rewards_train/2-w": 1.3009735345840454, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.933233737945557, "rewards_train/margins_1": 1.3153870105743408, "rewards_train/margins_2": 0.666759729385376, "step": 533 }, { "epoch": 1.6, "logps_train/policy_1_2": -162.79977416992188, "logps_train/policy_1_l": -203.3458709716797, "logps_train/policy_1_w": -100.19586181640625, "logps_train/policy_2_2": -128.45953369140625, "logps_train/policy_2_w": -138.98330688476562, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.885648488998413, "rewards_train/1-l": -2.0619192123413086, "rewards_train/1-w": 2.960101842880249, "rewards_train/2-2": 3.4722111225128174, "rewards_train/2-w": 1.9547940492630005, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.022021055221558, "rewards_train/margins_1": 1.074453353881836, "rewards_train/margins_2": 1.517417073249817, "step": 533 }, { "epoch": 1.6, "logps_train/policy_1_2": -123.9356689453125, "logps_train/policy_1_l": -146.9901885986328, "logps_train/policy_1_w": -125.9434814453125, "logps_train/policy_2_2": -101.63206481933594, "logps_train/policy_2_w": -160.79983520507812, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.8193238973617554, "rewards_train/1-l": -1.7144975662231445, "rewards_train/1-w": 3.1744024753570557, "rewards_train/2-2": 2.621948719024658, "rewards_train/2-w": 2.0903286933898926, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.8889000415802, "rewards_train/margins_1": 1.3550785779953003, "rewards_train/margins_2": 0.5316200256347656, "step": 533 }, { "epoch": 1.6, "logps_train/policy_1_2": -164.13221740722656, "logps_train/policy_1_l": -110.71402740478516, "logps_train/policy_1_w": -108.34027862548828, "logps_train/policy_2_2": -130.07876586914062, "logps_train/policy_2_w": -148.29690551757812, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.5602158308029175, "rewards_train/1-l": -1.1651532649993896, "rewards_train/1-w": 3.4894096851348877, "rewards_train/2-2": 3.017124652862549, "rewards_train/2-w": 1.707027792930603, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.654562950134277, "rewards_train/margins_1": 1.9291938543319702, "rewards_train/margins_2": 1.3100968599319458, "step": 533 }, { "epoch": 1.6, "logps_train/policy_1_2": -215.625732421875, "logps_train/policy_1_l": -274.4881896972656, "logps_train/policy_1_w": -205.61685180664062, "logps_train/policy_2_2": -182.32748413085938, "logps_train/policy_2_w": -256.03826904296875, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -248.0, "logps_train/ref_1_w": -242.0, "logps_train/ref_2_2": -221.0, "logps_train/ref_2_w": -276.0, "rewards_train/1-2": 2.787428379058838, "rewards_train/1-l": -2.5503830909729004, "rewards_train/1-w": 3.7203474044799805, "rewards_train/2-2": 3.8610024452209473, "rewards_train/2-w": 1.8352336883544922, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.270730495452881, "rewards_train/margins_1": 0.9329190254211426, "rewards_train/margins_2": 2.025768756866455, "step": 533 }, { "epoch": 1.6, "logps_train/policy_1_2": -155.38262939453125, "logps_train/policy_1_l": -149.47442626953125, "logps_train/policy_1_w": -166.7022705078125, "logps_train/policy_2_2": -112.05697631835938, "logps_train/policy_2_w": -235.3731689453125, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.9398610591888428, "rewards_train/1-l": -1.3411918878555298, "rewards_train/1-w": 3.08563232421875, "rewards_train/2-2": 3.312466621398926, "rewards_train/2-w": 0.29393303394317627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.42682421207428, "rewards_train/margins_1": 1.1457712650299072, "rewards_train/margins_2": 3.0185335874557495, "step": 533 }, { "epoch": 1.6, "logps_train/policy_1_2": -115.08001708984375, "logps_train/policy_1_l": -238.73715209960938, "logps_train/policy_1_w": -162.22860717773438, "logps_train/policy_2_2": -89.19799041748047, "logps_train/policy_2_w": -201.5589599609375, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -213.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 1.956060767173767, "rewards_train/1-l": -2.5364110469818115, "rewards_train/1-w": 3.947843551635742, "rewards_train/2-2": 2.5286386013031006, "rewards_train/2-w": 2.2855100631713867, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.484254598617554, "rewards_train/margins_1": 1.991782784461975, "rewards_train/margins_2": 0.24312853813171387, "step": 533 }, { "epoch": 1.6, "learning_rate": 5.432233860413172e-07, "loss": 0.4361, "step": 534 }, { "epoch": 1.6, "logps_train/policy_1_2": -195.1162109375, "logps_train/policy_1_l": -257.6495361328125, "logps_train/policy_1_w": -181.9949951171875, "logps_train/policy_2_2": -154.30694580078125, "logps_train/policy_2_w": -232.28038024902344, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -252.0, "rewards_train/1-2": 2.2079086303710938, "rewards_train/1-l": -2.738389492034912, "rewards_train/1-w": 3.5958127975463867, "rewards_train/2-2": 3.821258068084717, "rewards_train/2-w": 1.9750885963439941, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.334202289581299, "rewards_train/margins_1": 1.387904167175293, "rewards_train/margins_2": 1.8461694717407227, "step": 534 }, { "epoch": 1.6, "logps_train/policy_1_2": -230.63394165039062, "logps_train/policy_1_l": -236.81605529785156, "logps_train/policy_1_w": -208.28945922851562, "logps_train/policy_2_2": -185.8465576171875, "logps_train/policy_2_w": -245.86346435546875, "logps_train/ref_1_2": -254.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -252.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -274.0, "rewards_train/1-2": 2.349886417388916, "rewards_train/1-l": -2.483755588531494, "rewards_train/1-w": 4.322225570678711, "rewards_train/2-2": 4.223937034606934, "rewards_train/2-w": 2.774200201034546, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.805981159210205, "rewards_train/margins_1": 1.972339153289795, "rewards_train/margins_2": 1.4497368335723877, "step": 534 }, { "epoch": 1.6, "logps_train/policy_1_2": -160.9842529296875, "logps_train/policy_1_l": -221.9659423828125, "logps_train/policy_1_w": -131.30084228515625, "logps_train/policy_2_2": -125.66093444824219, "logps_train/policy_2_w": -173.30006408691406, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.760948896408081, "rewards_train/1-l": -2.7403452396392822, "rewards_train/1-w": 3.407416343688965, "rewards_train/2-2": 3.2839066982269287, "rewards_train/2-w": 2.001244068145752, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.147761583328247, "rewards_train/margins_1": 1.6464674472808838, "rewards_train/margins_2": 1.2826626300811768, "step": 534 }, { "epoch": 1.6, "logps_train/policy_1_2": -107.99172973632812, "logps_train/policy_1_l": -144.505859375, "logps_train/policy_1_w": -67.49980163574219, "logps_train/policy_2_2": -83.28419494628906, "logps_train/policy_2_w": -92.3580322265625, "logps_train/ref_1_2": -122.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -88.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -105.0, "rewards_train/1-2": 1.4016088247299194, "rewards_train/1-l": -2.3451175689697266, "rewards_train/1-w": 2.0285353660583496, "rewards_train/2-2": 2.3497049808502197, "rewards_train/2-w": 1.2415409088134766, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.373652935028076, "rewards_train/margins_1": 0.6269265413284302, "rewards_train/margins_2": 1.1081640720367432, "step": 534 }, { "epoch": 1.6, "logps_train/policy_1_2": -249.56031799316406, "logps_train/policy_1_l": -316.88330078125, "logps_train/policy_1_w": -171.81503295898438, "logps_train/policy_2_2": -199.45901489257812, "logps_train/policy_2_w": -217.4291229248047, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -278.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -238.0, "logps_train/ref_2_w": -241.0, "rewards_train/1-2": 1.8189678192138672, "rewards_train/1-l": -3.872706413269043, "rewards_train/1-w": 3.643496513366699, "rewards_train/2-2": 3.924410343170166, "rewards_train/2-w": 2.3133368492126465, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.516202926635742, "rewards_train/margins_1": 1.824528694152832, "rewards_train/margins_2": 1.6110734939575195, "step": 534 }, { "epoch": 1.6, "logps_train/policy_1_2": -118.61195373535156, "logps_train/policy_1_l": -110.11835479736328, "logps_train/policy_1_w": -112.43233489990234, "logps_train/policy_2_2": -83.70355224609375, "logps_train/policy_2_w": -138.47085571289062, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.104428768157959, "rewards_train/1-l": -1.6151559352874756, "rewards_train/1-w": 3.1247358322143555, "rewards_train/2-2": 2.47691011428833, "rewards_train/2-w": 2.124789237976074, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.739891767501831, "rewards_train/margins_1": 2.0203070640563965, "rewards_train/margins_2": 0.35212087631225586, "step": 534 }, { "epoch": 1.6, "logps_train/policy_1_2": -133.11737060546875, "logps_train/policy_1_l": -140.9851837158203, "logps_train/policy_1_w": -150.0876922607422, "logps_train/policy_2_2": -114.48289489746094, "logps_train/policy_2_w": -182.9835662841797, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 2.3196115493774414, "rewards_train/1-l": -1.882697582244873, "rewards_train/1-w": 4.4033403396606445, "rewards_train/2-2": 2.9680187702178955, "rewards_train/2-w": 2.6614089012145996, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.286037921905518, "rewards_train/margins_1": 2.083728790283203, "rewards_train/margins_2": 0.3066098690032959, "step": 534 }, { "epoch": 1.6, "logps_train/policy_1_2": -174.85562133789062, "logps_train/policy_1_l": -153.30828857421875, "logps_train/policy_1_w": -163.65245056152344, "logps_train/policy_2_2": -146.49026489257812, "logps_train/policy_2_w": -199.87911987304688, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -203.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.3284997940063477, "rewards_train/1-l": -1.3480156660079956, "rewards_train/1-w": 3.9550673961639404, "rewards_train/2-2": 2.6814422607421875, "rewards_train/2-w": 2.2230260372161865, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.303083062171936, "rewards_train/margins_1": 2.6265676021575928, "rewards_train/margins_2": 0.458416223526001, "step": 534 }, { "epoch": 1.6, "logps_train/policy_1_2": -110.83183288574219, "logps_train/policy_1_l": -113.46511840820312, "logps_train/policy_1_w": -46.767173767089844, "logps_train/policy_2_2": -82.05682373046875, "logps_train/policy_2_w": -70.45486450195312, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -69.0, "logps_train/ref_2_2": -107.5, "logps_train/ref_2_w": -84.0, "rewards_train/1-2": 1.2504109144210815, "rewards_train/1-l": -3.03537917137146, "rewards_train/1-w": 2.232560396194458, "rewards_train/2-2": 2.5716614723205566, "rewards_train/2-w": 1.3857638835906982, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.267939567565918, "rewards_train/margins_1": 0.9821494817733765, "rewards_train/margins_2": 1.1858975887298584, "step": 535 }, { "epoch": 1.6, "logps_train/policy_1_2": -110.45822143554688, "logps_train/policy_1_l": -83.12398529052734, "logps_train/policy_1_w": -130.79473876953125, "logps_train/policy_2_2": -92.05059814453125, "logps_train/policy_2_w": -150.98809814453125, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -67.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.365505576133728, "rewards_train/1-l": -1.649019479751587, "rewards_train/1-w": 3.0354673862457275, "rewards_train/2-2": 2.3834171295166016, "rewards_train/2-w": 2.0734567642211914, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.6844868659973145, "rewards_train/margins_1": 1.6699618101119995, "rewards_train/margins_2": 0.30996036529541016, "step": 535 }, { "epoch": 1.6, "logps_train/policy_1_2": -131.7731170654297, "logps_train/policy_1_l": -138.38815307617188, "logps_train/policy_1_w": -77.1995849609375, "logps_train/policy_2_2": -92.92695617675781, "logps_train/policy_2_w": -125.35775756835938, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -117.5, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 1.0492509603500366, "rewards_train/1-l": -2.0036587715148926, "rewards_train/1-w": 2.785510301589966, "rewards_train/2-2": 2.4916791915893555, "rewards_train/2-w": 1.2474275827407837, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.789169073104858, "rewards_train/margins_1": 1.7362593412399292, "rewards_train/margins_2": 1.2442516088485718, "step": 535 }, { "epoch": 1.6, "logps_train/policy_1_2": -130.4158477783203, "logps_train/policy_1_l": -75.62117004394531, "logps_train/policy_1_w": -61.10084533691406, "logps_train/policy_2_2": -98.9864730834961, "logps_train/policy_2_w": -79.21426391601562, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -65.0, "logps_train/ref_1_w": -79.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -90.5, "rewards_train/1-2": 0.8693524599075317, "rewards_train/1-l": -1.090687870979309, "rewards_train/1-w": 1.8039227724075317, "rewards_train/2-2": 2.5185399055480957, "rewards_train/2-w": 1.141216516494751, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.894610643386841, "rewards_train/margins_1": 0.9345703125, "rewards_train/margins_2": 1.3773233890533447, "step": 535 }, { "epoch": 1.6, "logps_train/policy_1_2": -106.86964416503906, "logps_train/policy_1_l": -103.5700912475586, "logps_train/policy_1_w": -100.55450439453125, "logps_train/policy_2_2": -78.23748779296875, "logps_train/policy_2_w": -146.8310546875, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -88.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -101.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.3081529140472412, "rewards_train/1-l": -1.583082914352417, "rewards_train/1-w": 2.789081573486328, "rewards_train/2-2": 2.281329393386841, "rewards_train/2-w": 0.7997057437896729, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.372164487838745, "rewards_train/margins_1": 1.480928659439087, "rewards_train/margins_2": 1.481623649597168, "step": 535 }, { "epoch": 1.6, "logps_train/policy_1_2": -207.04794311523438, "logps_train/policy_1_l": -228.07933044433594, "logps_train/policy_1_w": -196.7659912109375, "logps_train/policy_2_2": -155.7055206298828, "logps_train/policy_2_w": -267.6890869140625, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -245.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -288.0, "rewards_train/1-2": 1.7620022296905518, "rewards_train/1-l": -2.141526460647583, "rewards_train/1-w": 4.78277587890625, "rewards_train/2-2": 3.395267963409424, "rewards_train/2-w": 1.9248392581939697, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.924302339553833, "rewards_train/margins_1": 3.0207736492156982, "rewards_train/margins_2": 1.470428705215454, "step": 535 }, { "epoch": 1.6, "logps_train/policy_1_2": -94.94093322753906, "logps_train/policy_1_l": -121.4052963256836, "logps_train/policy_1_w": -72.58782958984375, "logps_train/policy_2_2": -69.13469696044922, "logps_train/policy_2_w": -108.94654846191406, "logps_train/ref_1_2": -102.5, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -87.5, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 0.7398905754089355, "rewards_train/1-l": -1.571779727935791, "rewards_train/1-w": 2.4658262729644775, "rewards_train/2-2": 1.8294991254806519, "rewards_train/2-w": 0.9748764038085938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.0376060009002686, "rewards_train/margins_1": 1.725935697555542, "rewards_train/margins_2": 0.8546227216720581, "step": 535 }, { "epoch": 1.6, "logps_train/policy_1_2": -141.35975646972656, "logps_train/policy_1_l": -163.69754028320312, "logps_train/policy_1_w": -122.71173095703125, "logps_train/policy_2_2": -107.42261505126953, "logps_train/policy_2_w": -167.06044006347656, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.1507433652877808, "rewards_train/1-l": -2.1292080879211426, "rewards_train/1-w": 3.0569510459899902, "rewards_train/2-2": 2.6360583305358887, "rewards_train/2-w": 1.0728625059127808, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.186159133911133, "rewards_train/margins_1": 1.9062076807022095, "rewards_train/margins_2": 1.563195824623108, "step": 535 }, { "epoch": 1.6, "learning_rate": 5.279474068256768e-07, "loss": 0.4289, "step": 536 }, { "epoch": 1.6, "logps_train/policy_1_2": -179.04977416992188, "logps_train/policy_1_l": -155.438232421875, "logps_train/policy_1_w": -133.54449462890625, "logps_train/policy_2_2": -138.1280517578125, "logps_train/policy_2_w": -164.32144165039062, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 2.4700214862823486, "rewards_train/1-l": -2.0862059593200684, "rewards_train/1-w": 4.040180206298828, "rewards_train/2-2": 4.28719425201416, "rewards_train/2-w": 2.7500829696655273, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.1263861656188965, "rewards_train/margins_1": 1.5701587200164795, "rewards_train/margins_2": 1.5371112823486328, "step": 536 }, { "epoch": 1.6, "logps_train/policy_1_2": -102.83041381835938, "logps_train/policy_1_l": -155.82855224609375, "logps_train/policy_1_w": -92.00221252441406, "logps_train/policy_2_2": -73.63600158691406, "logps_train/policy_2_w": -130.92471313476562, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -92.5, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.0380526781082153, "rewards_train/1-l": -2.4053168296813965, "rewards_train/1-w": 2.6802475452423096, "rewards_train/2-2": 1.9045636653900146, "rewards_train/2-w": 1.1376070976257324, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.085564374923706, "rewards_train/margins_1": 1.6421948671340942, "rewards_train/margins_2": 0.7669565677642822, "step": 536 }, { "epoch": 1.6, "logps_train/policy_1_2": -104.8735122680664, "logps_train/policy_1_l": -100.11067199707031, "logps_train/policy_1_w": -39.395294189453125, "logps_train/policy_2_2": -76.50627136230469, "logps_train/policy_2_w": -63.880393981933594, "logps_train/ref_1_2": -115.5, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -61.0, "logps_train/ref_2_2": -100.5, "logps_train/ref_2_w": -79.0, "rewards_train/1-2": 1.0407735109329224, "rewards_train/1-l": -1.6288405656814575, "rewards_train/1-w": 2.1514861583709717, "rewards_train/2-2": 2.3876547813415527, "rewards_train/2-w": 1.5125463008880615, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.780326724052429, "rewards_train/margins_1": 1.1107126474380493, "rewards_train/margins_2": 0.8751084804534912, "step": 536 }, { "epoch": 1.6, "logps_train/policy_1_2": -217.39068603515625, "logps_train/policy_1_l": -158.1315460205078, "logps_train/policy_1_w": -177.21673583984375, "logps_train/policy_2_2": -172.36126708984375, "logps_train/policy_2_w": -233.79721069335938, "logps_train/ref_1_2": -247.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -222.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 2.9234306812286377, "rewards_train/1-l": -1.5225296020507812, "rewards_train/1-w": 4.40332555770874, "rewards_train/2-2": 4.616998672485352, "rewards_train/2-w": 2.0827784538269043, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.9258551597595215, "rewards_train/margins_1": 1.4798948764801025, "rewards_train/margins_2": 2.5342202186584473, "step": 536 }, { "epoch": 1.6, "logps_train/policy_1_2": -204.72073364257812, "logps_train/policy_1_l": -303.23699951171875, "logps_train/policy_1_w": -202.45318603515625, "logps_train/policy_2_2": -162.73318481445312, "logps_train/policy_2_w": -260.70245361328125, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -276.0, "logps_train/ref_1_w": -244.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -282.0, "rewards_train/1-2": 1.9052720069885254, "rewards_train/1-l": -2.770867109298706, "rewards_train/1-w": 4.25702428817749, "rewards_train/2-2": 3.2091031074523926, "rewards_train/2-w": 2.1246776580810547, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.027891397476196, "rewards_train/margins_1": 2.351752281188965, "rewards_train/margins_2": 1.084425449371338, "step": 536 }, { "epoch": 1.6, "logps_train/policy_1_2": -176.94210815429688, "logps_train/policy_1_l": -192.2489471435547, "logps_train/policy_1_w": -107.47734832763672, "logps_train/policy_2_2": -131.95782470703125, "logps_train/policy_2_w": -165.6482696533203, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": 1.3464138507843018, "rewards_train/1-l": -2.350285530090332, "rewards_train/1-w": 3.1022655963897705, "rewards_train/2-2": 3.26124906539917, "rewards_train/2-w": 1.5257980823516846, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.4525511264801025, "rewards_train/margins_1": 1.7558517456054688, "rewards_train/margins_2": 1.7354509830474854, "step": 536 }, { "epoch": 1.6, "logps_train/policy_1_2": -104.16861724853516, "logps_train/policy_1_l": -60.920677185058594, "logps_train/policy_1_w": -79.5303726196289, "logps_train/policy_2_2": -71.52027130126953, "logps_train/policy_2_w": -114.75599670410156, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -50.0, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -99.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.071419596672058, "rewards_train/1-l": -1.076052188873291, "rewards_train/1-w": 2.8911032676696777, "rewards_train/2-2": 2.7401599884033203, "rewards_train/2-w": 1.6126818656921387, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9671554565429688, "rewards_train/margins_1": 1.8196836709976196, "rewards_train/margins_2": 1.1274781227111816, "step": 536 }, { "epoch": 1.6, "logps_train/policy_1_2": -145.35960388183594, "logps_train/policy_1_l": -116.64892578125, "logps_train/policy_1_w": -107.51318359375, "logps_train/policy_2_2": -117.6917495727539, "logps_train/policy_2_w": -134.5740509033203, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 2.5116963386535645, "rewards_train/1-l": -2.075634002685547, "rewards_train/1-w": 3.278125524520874, "rewards_train/2-2": 3.765591621398926, "rewards_train/2-w": 2.0420093536376953, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.353759527206421, "rewards_train/margins_1": 0.7664291858673096, "rewards_train/margins_2": 1.7235822677612305, "step": 536 }, { "epoch": 1.61, "logps_train/policy_1_2": -188.064453125, "logps_train/policy_1_l": -123.20811462402344, "logps_train/policy_1_w": -94.8409423828125, "logps_train/policy_2_2": -139.37057495117188, "logps_train/policy_2_w": -137.61766052246094, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 0.982227087020874, "rewards_train/1-l": -1.817490816116333, "rewards_train/1-w": 3.2537968158721924, "rewards_train/2-2": 3.0930697917938232, "rewards_train/2-w": 1.787062644958496, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.071287631988525, "rewards_train/margins_1": 2.2715697288513184, "rewards_train/margins_2": 1.3060071468353271, "step": 537 }, { "epoch": 1.61, "logps_train/policy_1_2": -138.3468017578125, "logps_train/policy_1_l": -105.7908706665039, "logps_train/policy_1_w": -148.5151824951172, "logps_train/policy_2_2": -101.66668701171875, "logps_train/policy_2_w": -189.4947052001953, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 1.2778204679489136, "rewards_train/1-l": -1.6806490421295166, "rewards_train/1-w": 3.1275832653045654, "rewards_train/2-2": 2.7223939895629883, "rewards_train/2-w": 1.1716225147247314, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.808232307434082, "rewards_train/margins_1": 1.8497627973556519, "rewards_train/margins_2": 1.5507714748382568, "step": 537 }, { "epoch": 1.61, "logps_train/policy_1_2": -119.643310546875, "logps_train/policy_1_l": -125.32234191894531, "logps_train/policy_1_w": -74.73319244384766, "logps_train/policy_2_2": -87.50341796875, "logps_train/policy_2_w": -104.01699829101562, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -99.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 1.2231694459915161, "rewards_train/1-l": -2.349421262741089, "rewards_train/1-w": 2.4571492671966553, "rewards_train/2-2": 2.837158203125, "rewards_train/2-w": 1.360018253326416, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.806570529937744, "rewards_train/margins_1": 1.2339798212051392, "rewards_train/margins_2": 1.477139949798584, "step": 537 }, { "epoch": 1.61, "logps_train/policy_1_2": -196.2360382080078, "logps_train/policy_1_l": -256.1976013183594, "logps_train/policy_1_w": -216.58029174804688, "logps_train/policy_2_2": -151.0109100341797, "logps_train/policy_2_w": -289.99615478515625, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -268.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -308.0, "rewards_train/1-2": 2.651395797729492, "rewards_train/1-l": -2.7916371822357178, "rewards_train/1-w": 5.041970252990723, "rewards_train/2-2": 4.030157566070557, "rewards_train/2-w": 1.8691351413726807, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.83360743522644, "rewards_train/margins_1": 2.3905744552612305, "rewards_train/margins_2": 2.161022424697876, "step": 537 }, { "epoch": 1.61, "logps_train/policy_1_2": -254.3635711669922, "logps_train/policy_1_l": -321.49530029296875, "logps_train/policy_1_w": -164.57046508789062, "logps_train/policy_2_2": -191.74429321289062, "logps_train/policy_2_w": -236.35464477539062, "logps_train/ref_1_2": -274.0, "logps_train/ref_1_l": -292.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 1.9980182647705078, "rewards_train/1-l": -3.0924999713897705, "rewards_train/1-w": 4.285141468048096, "rewards_train/2-2": 3.9411964416503906, "rewards_train/2-w": 1.923911452293396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.377641439437866, "rewards_train/margins_1": 2.287123203277588, "rewards_train/margins_2": 2.0172849893569946, "step": 537 }, { "epoch": 1.61, "logps_train/policy_1_2": -131.5793914794922, "logps_train/policy_1_l": -181.000732421875, "logps_train/policy_1_w": -114.52175903320312, "logps_train/policy_2_2": -99.2977294921875, "logps_train/policy_2_w": -143.8897705078125, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.382685661315918, "rewards_train/1-l": -1.4559340476989746, "rewards_train/1-w": 2.650949001312256, "rewards_train/2-2": 2.4913198947906494, "rewards_train/2-w": 1.2309436798095703, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.1068830490112305, "rewards_train/margins_1": 1.268263339996338, "rewards_train/margins_2": 1.260376214981079, "step": 537 }, { "epoch": 1.61, "logps_train/policy_1_2": -145.65382385253906, "logps_train/policy_1_l": -150.42416381835938, "logps_train/policy_1_w": -84.42681884765625, "logps_train/policy_2_2": -112.8017578125, "logps_train/policy_2_w": -110.8531723022461, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -109.5, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -124.5, "rewards_train/1-2": 1.2092275619506836, "rewards_train/1-l": -2.2150726318359375, "rewards_train/1-w": 2.505168914794922, "rewards_train/2-2": 2.503417730331421, "rewards_train/2-w": 1.351987361907959, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.720241546630859, "rewards_train/margins_1": 1.2959413528442383, "rewards_train/margins_2": 1.151430368423462, "step": 537 }, { "epoch": 1.61, "logps_train/policy_1_2": -272.6918029785156, "logps_train/policy_1_l": -236.68807983398438, "logps_train/policy_1_w": -149.080078125, "logps_train/policy_2_2": -196.96493530273438, "logps_train/policy_2_w": -205.69146728515625, "logps_train/ref_1_2": -288.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -244.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 1.4058198928833008, "rewards_train/1-l": -2.9719343185424805, "rewards_train/1-w": 4.245898246765137, "rewards_train/2-2": 4.678507328033447, "rewards_train/2-w": 2.1964778900146484, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.217832565307617, "rewards_train/margins_1": 2.840078353881836, "rewards_train/margins_2": 2.482029438018799, "step": 537 }, { "epoch": 1.61, "learning_rate": 5.128638964666166e-07, "loss": 0.3361, "step": 538 }, { "epoch": 1.61, "logps_train/policy_1_2": -153.5990447998047, "logps_train/policy_1_l": -157.75054931640625, "logps_train/policy_1_w": -106.79985046386719, "logps_train/policy_2_2": -128.94900512695312, "logps_train/policy_2_w": -133.30572509765625, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 2.115877628326416, "rewards_train/1-l": -1.5461490154266357, "rewards_train/1-w": 2.755171537399292, "rewards_train/2-2": 3.051192283630371, "rewards_train/2-w": 2.1069276332855225, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.301320552825928, "rewards_train/margins_1": 0.639293909072876, "rewards_train/margins_2": 0.9442646503448486, "step": 538 }, { "epoch": 1.61, "logps_train/policy_1_2": -174.0037841796875, "logps_train/policy_1_l": -203.57798767089844, "logps_train/policy_1_w": -119.86966705322266, "logps_train/policy_2_2": -142.975341796875, "logps_train/policy_2_w": -153.7027587890625, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": 1.429308295249939, "rewards_train/1-l": -2.297642707824707, "rewards_train/1-w": 2.5443811416625977, "rewards_train/2-2": 2.8587164878845215, "rewards_train/2-w": 1.7023801803588867, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.842023849487305, "rewards_train/margins_1": 1.1150728464126587, "rewards_train/margins_2": 1.1563363075256348, "step": 538 }, { "epoch": 1.61, "logps_train/policy_1_2": -114.97535705566406, "logps_train/policy_1_l": -100.12478637695312, "logps_train/policy_1_w": -124.60238647460938, "logps_train/policy_2_2": -94.049560546875, "logps_train/policy_2_w": -156.6048583984375, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 2.2688705921173096, "rewards_train/1-l": -1.7115023136138916, "rewards_train/1-w": 3.8381993770599365, "rewards_train/2-2": 3.157153844833374, "rewards_train/2-w": 2.1645150184631348, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.549701690673828, "rewards_train/margins_1": 1.569328784942627, "rewards_train/margins_2": 0.9926388263702393, "step": 538 }, { "epoch": 1.61, "logps_train/policy_1_2": -141.4222412109375, "logps_train/policy_1_l": -121.66802978515625, "logps_train/policy_1_w": -89.65118408203125, "logps_train/policy_2_2": -101.567138671875, "logps_train/policy_2_w": -124.98120880126953, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.2359020709991455, "rewards_train/1-l": -1.1910216808319092, "rewards_train/1-w": 3.44035005569458, "rewards_train/2-2": 3.0483648777008057, "rewards_train/2-w": 2.1393790245056152, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.631371736526489, "rewards_train/margins_1": 2.2044479846954346, "rewards_train/margins_2": 0.9089858531951904, "step": 538 }, { "epoch": 1.61, "logps_train/policy_1_2": -247.3958740234375, "logps_train/policy_1_l": -141.20809936523438, "logps_train/policy_1_w": -157.70663452148438, "logps_train/policy_2_2": -184.52371215820312, "logps_train/policy_2_w": -196.56198120117188, "logps_train/ref_1_2": -254.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -224.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 0.7201786041259766, "rewards_train/1-l": -1.3077235221862793, "rewards_train/1-w": 3.2805089950561523, "rewards_train/2-2": 3.917747974395752, "rewards_train/2-w": 1.847513198852539, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.588232517242432, "rewards_train/margins_1": 2.560330390930176, "rewards_train/margins_2": 2.070234775543213, "step": 538 }, { "epoch": 1.61, "logps_train/policy_1_2": -168.73294067382812, "logps_train/policy_1_l": -192.78201293945312, "logps_train/policy_1_w": -100.98536682128906, "logps_train/policy_2_2": -120.53663635253906, "logps_train/policy_2_w": -135.26683044433594, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.4735815525054932, "rewards_train/1-l": -2.3160905838012695, "rewards_train/1-w": 2.998338460922241, "rewards_train/2-2": 3.336961269378662, "rewards_train/2-w": 2.096754550933838, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.314429044723511, "rewards_train/margins_1": 1.524756908416748, "rewards_train/margins_2": 1.2402067184448242, "step": 538 }, { "epoch": 1.61, "logps_train/policy_1_2": -143.554931640625, "logps_train/policy_1_l": -230.771484375, "logps_train/policy_1_w": -104.40106201171875, "logps_train/policy_2_2": -104.42739868164062, "logps_train/policy_2_w": -152.54237365722656, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": 0.9601325988769531, "rewards_train/1-l": -2.1830062866210938, "rewards_train/1-w": 2.756767988204956, "rewards_train/2-2": 2.258431911468506, "rewards_train/2-w": 1.4363880157470703, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.93977427482605, "rewards_train/margins_1": 1.796635389328003, "rewards_train/margins_2": 0.8220438957214355, "step": 538 }, { "epoch": 1.61, "logps_train/policy_1_2": -86.11375427246094, "logps_train/policy_1_l": -129.18190002441406, "logps_train/policy_1_w": -80.58059692382812, "logps_train/policy_2_2": -66.09133911132812, "logps_train/policy_2_w": -111.17501068115234, "logps_train/ref_1_2": -93.0, "logps_train/ref_1_l": -109.5, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -84.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 0.6933115720748901, "rewards_train/1-l": -1.9655520915985107, "rewards_train/1-w": 2.657564640045166, "rewards_train/2-2": 1.774264931678772, "rewards_train/2-w": 1.391873836517334, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.623116731643677, "rewards_train/margins_1": 1.9642530679702759, "rewards_train/margins_2": 0.382391095161438, "step": 538 }, { "epoch": 1.61, "logps_train/policy_1_2": -132.12020874023438, "logps_train/policy_1_l": -155.60446166992188, "logps_train/policy_1_w": -96.1291732788086, "logps_train/policy_2_2": -92.69158935546875, "logps_train/policy_2_w": -144.885986328125, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.069229006767273, "rewards_train/1-l": -1.8838832378387451, "rewards_train/1-w": 2.9394264221191406, "rewards_train/2-2": 2.427716016769409, "rewards_train/2-w": 1.0035879611968994, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.823309659957886, "rewards_train/margins_1": 1.8701974153518677, "rewards_train/margins_2": 1.4241280555725098, "step": 539 }, { "epoch": 1.61, "logps_train/policy_1_2": -167.83741760253906, "logps_train/policy_1_l": -179.07798767089844, "logps_train/policy_1_w": -162.43670654296875, "logps_train/policy_2_2": -128.490966796875, "logps_train/policy_2_w": -208.67315673828125, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.8271949291229248, "rewards_train/1-l": -2.535923480987549, "rewards_train/1-w": 3.511408805847168, "rewards_train/2-2": 3.178245782852173, "rewards_train/2-w": 2.0912787914276123, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.047332286834717, "rewards_train/margins_1": 1.6842138767242432, "rewards_train/margins_2": 1.0869669914245605, "step": 539 }, { "epoch": 1.61, "logps_train/policy_1_2": -42.64979553222656, "logps_train/policy_1_l": -72.63973999023438, "logps_train/policy_1_w": -69.09700012207031, "logps_train/policy_2_2": -28.988500595092773, "logps_train/policy_2_w": -94.01329040527344, "logps_train/ref_1_2": -47.75, "logps_train/ref_1_l": -55.5, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -38.5, "logps_train/ref_2_w": -96.5, "rewards_train/1-2": 0.5065045356750488, "rewards_train/1-l": -1.728036642074585, "rewards_train/1-w": 1.715006709098816, "rewards_train/2-2": 0.9657983183860779, "rewards_train/2-w": 0.26136642694473267, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.443043351173401, "rewards_train/margins_1": 1.208502173423767, "rewards_train/margins_2": 0.7044318914413452, "step": 539 }, { "epoch": 1.61, "logps_train/policy_1_2": -134.5128631591797, "logps_train/policy_1_l": -152.230712890625, "logps_train/policy_1_w": -77.10503387451172, "logps_train/policy_2_2": -111.06295776367188, "logps_train/policy_2_w": -96.53404235839844, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 1.511603832244873, "rewards_train/1-l": -2.1011979579925537, "rewards_train/1-w": 2.6660590171813965, "rewards_train/2-2": 2.3538601398468018, "rewards_train/2-w": 1.8965952396392822, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.76725697517395, "rewards_train/margins_1": 1.1544551849365234, "rewards_train/margins_2": 0.45726490020751953, "step": 539 }, { "epoch": 1.61, "logps_train/policy_1_2": -228.28265380859375, "logps_train/policy_1_l": -279.200439453125, "logps_train/policy_1_w": -141.72935485839844, "logps_train/policy_2_2": -174.91925048828125, "logps_train/policy_2_w": -183.9869384765625, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -239.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.7904844284057617, "rewards_train/1-l": -4.049342632293701, "rewards_train/1-w": 3.0098767280578613, "rewards_train/2-2": 4.076826095581055, "rewards_train/2-w": 1.9778687953948975, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.0592193603515625, "rewards_train/margins_1": 1.2193922996520996, "rewards_train/margins_2": 2.0989573001861572, "step": 539 }, { "epoch": 1.61, "logps_train/policy_1_2": -178.9975128173828, "logps_train/policy_1_l": -202.5240478515625, "logps_train/policy_1_w": -166.64219665527344, "logps_train/policy_2_2": -131.71971130371094, "logps_train/policy_2_w": -236.95907592773438, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -250.0, "rewards_train/1-2": 1.5283747911453247, "rewards_train/1-l": -1.0762338638305664, "rewards_train/1-w": 3.910780429840088, "rewards_train/2-2": 3.149904251098633, "rewards_train/2-w": 1.27909255027771, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.987014293670654, "rewards_train/margins_1": 2.382405638694763, "rewards_train/margins_2": 1.8708117008209229, "step": 539 }, { "epoch": 1.61, "logps_train/policy_1_2": -148.58529663085938, "logps_train/policy_1_l": -154.23736572265625, "logps_train/policy_1_w": -100.09246826171875, "logps_train/policy_2_2": -119.117431640625, "logps_train/policy_2_w": -131.24659729003906, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 1.0777994394302368, "rewards_train/1-l": -2.601470947265625, "rewards_train/1-w": 3.5720036029815674, "rewards_train/2-2": 2.628295421600342, "rewards_train/2-w": 1.9565906524658203, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.173474550247192, "rewards_train/margins_1": 2.4942041635513306, "rewards_train/margins_2": 0.6717047691345215, "step": 539 }, { "epoch": 1.61, "logps_train/policy_1_2": -77.2998046875, "logps_train/policy_1_l": -117.42013549804688, "logps_train/policy_1_w": -55.54202651977539, "logps_train/policy_2_2": -48.2232551574707, "logps_train/policy_2_w": -91.55133056640625, "logps_train/ref_1_2": -83.0, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -76.0, "logps_train/ref_2_2": -65.0, "logps_train/ref_2_w": -97.5, "rewards_train/1-2": 0.5595704317092896, "rewards_train/1-l": -2.2995827198028564, "rewards_train/1-w": 2.0110316276550293, "rewards_train/2-2": 1.7010630369186401, "rewards_train/2-w": 0.6050236225128174, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.310614347457886, "rewards_train/margins_1": 1.4514611959457397, "rewards_train/margins_2": 1.0960394144058228, "step": 539 }, { "epoch": 1.62, "learning_rate": 4.97974327088156e-07, "loss": 0.4138, "step": 540 }, { "epoch": 1.62, "logps_train/policy_1_2": -109.06307983398438, "logps_train/policy_1_l": -170.98910522460938, "logps_train/policy_1_w": -143.45993041992188, "logps_train/policy_2_2": -84.94058227539062, "logps_train/policy_2_w": -178.56814575195312, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -112.5, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": 1.7749428749084473, "rewards_train/1-l": -2.969028949737549, "rewards_train/1-w": 4.223537445068359, "rewards_train/2-2": 2.747738838195801, "rewards_train/2-w": 2.8228719234466553, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.192566394805908, "rewards_train/margins_1": 2.448594570159912, "rewards_train/margins_2": -0.07513308525085449, "step": 540 }, { "epoch": 1.62, "logps_train/policy_1_2": -164.6484375, "logps_train/policy_1_l": -200.72036743164062, "logps_train/policy_1_w": -159.50009155273438, "logps_train/policy_2_2": -133.1641082763672, "logps_train/policy_2_w": -211.45822143554688, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -205.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 1.650780439376831, "rewards_train/1-l": -2.2370758056640625, "rewards_train/1-w": 4.526553153991699, "rewards_train/2-2": 2.9855427742004395, "rewards_train/2-w": 2.0045690536499023, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.763628959655762, "rewards_train/margins_1": 2.875772714614868, "rewards_train/margins_2": 0.9809737205505371, "step": 540 }, { "epoch": 1.62, "logps_train/policy_1_2": -109.17897033691406, "logps_train/policy_1_l": -115.91331481933594, "logps_train/policy_1_w": -97.07164764404297, "logps_train/policy_2_2": -86.46141052246094, "logps_train/policy_2_w": -123.203857421875, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 0.9977281093597412, "rewards_train/1-l": -1.5528545379638672, "rewards_train/1-w": 2.1252574920654297, "rewards_train/2-2": 1.97026526927948, "rewards_train/2-w": 0.8471919894218445, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.678112030029297, "rewards_train/margins_1": 1.1275293827056885, "rewards_train/margins_2": 1.1230732798576355, "step": 540 }, { "epoch": 1.62, "logps_train/policy_1_2": -127.95633697509766, "logps_train/policy_1_l": -81.43276977539062, "logps_train/policy_1_w": -99.19986724853516, "logps_train/policy_2_2": -98.42252349853516, "logps_train/policy_2_w": -125.78863525390625, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -70.5, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": 1.2059285640716553, "rewards_train/1-l": -1.10636305809021, "rewards_train/1-w": 2.499544143676758, "rewards_train/2-2": 2.5936851501464844, "rewards_train/2-w": 1.114885926246643, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6059072017669678, "rewards_train/margins_1": 1.2936155796051025, "rewards_train/margins_2": 1.4787992238998413, "step": 540 }, { "epoch": 1.62, "logps_train/policy_1_2": -131.87124633789062, "logps_train/policy_1_l": -167.44696044921875, "logps_train/policy_1_w": -120.00369262695312, "logps_train/policy_2_2": -101.05874633789062, "logps_train/policy_2_w": -154.4339599609375, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": 1.518735408782959, "rewards_train/1-l": -2.0620248317718506, "rewards_train/1-w": 3.3479278087615967, "rewards_train/2-2": 2.9617035388946533, "rewards_train/2-w": 1.676720142364502, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.409952640533447, "rewards_train/margins_1": 1.8291923999786377, "rewards_train/margins_2": 1.2849833965301514, "step": 540 }, { "epoch": 1.62, "logps_train/policy_1_2": -74.76161193847656, "logps_train/policy_1_l": -108.33882141113281, "logps_train/policy_1_w": -90.11497497558594, "logps_train/policy_2_2": -53.551353454589844, "logps_train/policy_2_w": -104.83984375, "logps_train/ref_1_2": -89.0, "logps_train/ref_1_l": -87.5, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -75.5, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 1.4425890445709229, "rewards_train/1-l": -2.0897417068481445, "rewards_train/1-w": 2.351783514022827, "rewards_train/2-2": 2.1972086429595947, "rewards_train/2-w": 1.573828101158142, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.441525220870972, "rewards_train/margins_1": 0.9091944694519043, "rewards_train/margins_2": 0.6233805418014526, "step": 540 }, { "epoch": 1.62, "logps_train/policy_1_2": -184.57748413085938, "logps_train/policy_1_l": -223.02586364746094, "logps_train/policy_1_w": -134.94944763183594, "logps_train/policy_2_2": -139.68460083007812, "logps_train/policy_2_w": -181.75865173339844, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.767252802848816, "rewards_train/1-l": -2.0580549240112305, "rewards_train/1-w": 3.348804473876953, "rewards_train/2-2": 3.2971649169921875, "rewards_train/2-w": 1.9014782905578613, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.406859397888184, "rewards_train/margins_1": 1.5815516710281372, "rewards_train/margins_2": 1.3956866264343262, "step": 540 }, { "epoch": 1.62, "logps_train/policy_1_2": -138.2020721435547, "logps_train/policy_1_l": -165.13319396972656, "logps_train/policy_1_w": -112.44212341308594, "logps_train/policy_2_2": -119.27032470703125, "logps_train/policy_2_w": -136.1373291015625, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 2.1485419273376465, "rewards_train/1-l": -1.833827257156372, "rewards_train/1-w": 3.135085105895996, "rewards_train/2-2": 2.7842957973480225, "rewards_train/2-w": 2.2571654319763184, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.968912363052368, "rewards_train/margins_1": 0.9865431785583496, "rewards_train/margins_2": 0.5271303653717041, "step": 540 }, { "epoch": 1.62, "logps_train/policy_1_2": -156.60470581054688, "logps_train/policy_1_l": -112.96331024169922, "logps_train/policy_1_w": -65.95374298095703, "logps_train/policy_2_2": -124.57176971435547, "logps_train/policy_2_w": -90.9739990234375, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -101.0, "rewards_train/1-2": 1.3645305633544922, "rewards_train/1-l": -2.1256282329559326, "rewards_train/1-w": 1.9745478630065918, "rewards_train/2-2": 2.945948600769043, "rewards_train/2-w": 0.9752566814422607, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.100176095962524, "rewards_train/margins_1": 0.6100172996520996, "rewards_train/margins_2": 1.9706919193267822, "step": 541 }, { "epoch": 1.62, "logps_train/policy_1_2": -104.76217651367188, "logps_train/policy_1_l": -88.30044555664062, "logps_train/policy_1_w": -55.542030334472656, "logps_train/policy_2_2": -62.71992874145508, "logps_train/policy_2_w": -96.31300354003906, "logps_train/ref_1_2": -110.0, "logps_train/ref_1_l": -68.0, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -105.0, "rewards_train/1-2": 0.500735342502594, "rewards_train/1-l": -2.0073888301849365, "rewards_train/1-w": 2.446578025817871, "rewards_train/2-2": 2.267850875854492, "rewards_train/2-w": 0.86948162317276, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.453966856002808, "rewards_train/margins_1": 1.945842683315277, "rewards_train/margins_2": 1.3983692526817322, "step": 541 }, { "epoch": 1.62, "logps_train/policy_1_2": -129.0096435546875, "logps_train/policy_1_l": -138.53164672851562, "logps_train/policy_1_w": -148.49874877929688, "logps_train/policy_2_2": -103.28306579589844, "logps_train/policy_2_w": -184.99349975585938, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 1.902160882949829, "rewards_train/1-l": -1.3822674751281738, "rewards_train/1-w": 2.9782495498657227, "rewards_train/2-2": 2.6271626949310303, "rewards_train/2-w": 1.6365876197814941, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.3605170249938965, "rewards_train/margins_1": 1.0760886669158936, "rewards_train/margins_2": 0.9905750751495361, "step": 541 }, { "epoch": 1.62, "logps_train/policy_1_2": -168.9281768798828, "logps_train/policy_1_l": -123.37136840820312, "logps_train/policy_1_w": -97.39178466796875, "logps_train/policy_2_2": -140.0926513671875, "logps_train/policy_2_w": -127.25682067871094, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.3024945259094238, "rewards_train/1-l": -1.4073032140731812, "rewards_train/1-w": 3.397246837615967, "rewards_train/2-2": 2.537609100341797, "rewards_train/2-w": 1.8019554615020752, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.804550051689148, "rewards_train/margins_1": 2.094752311706543, "rewards_train/margins_2": 0.7356536388397217, "step": 541 }, { "epoch": 1.62, "logps_train/policy_1_2": -141.59877014160156, "logps_train/policy_1_l": -109.36647033691406, "logps_train/policy_1_w": -78.20753479003906, "logps_train/policy_2_2": -115.99893188476562, "logps_train/policy_2_w": -101.37876892089844, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -121.5, "rewards_train/1-2": 1.801451325416565, "rewards_train/1-l": -1.898169994354248, "rewards_train/1-w": 3.173191547393799, "rewards_train/2-2": 2.6915130615234375, "rewards_train/2-w": 2.030677556991577, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.071361541748047, "rewards_train/margins_1": 1.3717402219772339, "rewards_train/margins_2": 0.6608355045318604, "step": 541 }, { "epoch": 1.62, "logps_train/policy_1_2": -137.90525817871094, "logps_train/policy_1_l": -127.62290954589844, "logps_train/policy_1_w": -71.38192749023438, "logps_train/policy_2_2": -91.36842346191406, "logps_train/policy_2_w": -96.75096893310547, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -111.5, "rewards_train/1-2": 0.9469737410545349, "rewards_train/1-l": -1.7599477767944336, "rewards_train/1-w": 2.189931869506836, "rewards_train/2-2": 2.520969867706299, "rewards_train/2-w": 1.4647464752197266, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9498796463012695, "rewards_train/margins_1": 1.242958128452301, "rewards_train/margins_2": 1.0562233924865723, "step": 541 }, { "epoch": 1.62, "logps_train/policy_1_2": -86.70575714111328, "logps_train/policy_1_l": -116.02200317382812, "logps_train/policy_1_w": -104.68205261230469, "logps_train/policy_2_2": -69.60346221923828, "logps_train/policy_2_w": -122.20132446289062, "logps_train/ref_1_2": -103.5, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -93.5, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 1.6985652446746826, "rewards_train/1-l": -2.110403060913086, "rewards_train/1-w": 1.9023027420043945, "rewards_train/2-2": 2.384575843811035, "rewards_train/2-w": 1.1007654666900635, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.0127058029174805, "rewards_train/margins_1": 0.20373749732971191, "rewards_train/margins_2": 1.2838103771209717, "step": 541 }, { "epoch": 1.62, "logps_train/policy_1_2": -122.9666976928711, "logps_train/policy_1_l": -128.02784729003906, "logps_train/policy_1_w": -104.30621337890625, "logps_train/policy_2_2": -97.64080047607422, "logps_train/policy_2_w": -124.60145568847656, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.9408305883407593, "rewards_train/1-l": -1.5902843475341797, "rewards_train/1-w": 2.9475038051605225, "rewards_train/2-2": 3.0546698570251465, "rewards_train/2-w": 2.1226675510406494, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.537788152694702, "rewards_train/margins_1": 1.0066732168197632, "rewards_train/margins_2": 0.9320023059844971, "step": 541 }, { "epoch": 1.62, "learning_rate": 4.832801518860175e-07, "loss": 0.4894, "step": 542 }, { "epoch": 1.62, "logps_train/policy_1_2": -128.83810424804688, "logps_train/policy_1_l": -143.8774871826172, "logps_train/policy_1_w": -115.306640625, "logps_train/policy_2_2": -105.82504272460938, "logps_train/policy_2_w": -131.79776000976562, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 2.1880645751953125, "rewards_train/1-l": -1.5986864566802979, "rewards_train/1-w": 3.3208978176116943, "rewards_train/2-2": 2.898745536804199, "rewards_train/2-w": 2.8202242851257324, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.919584274291992, "rewards_train/margins_1": 1.1328332424163818, "rewards_train/margins_2": 0.0785212516784668, "step": 542 }, { "epoch": 1.62, "logps_train/policy_1_2": -160.86453247070312, "logps_train/policy_1_l": -157.894287109375, "logps_train/policy_1_w": -107.4495849609375, "logps_train/policy_2_2": -137.3740234375, "logps_train/policy_2_w": -132.38775634765625, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.8541717529296875, "rewards_train/1-l": -1.6102296113967896, "rewards_train/1-w": 2.4011354446411133, "rewards_train/2-2": 2.7602531909942627, "rewards_train/2-w": 1.278411865234375, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.011365056037903, "rewards_train/margins_1": 0.5469636917114258, "rewards_train/margins_2": 1.4818413257598877, "step": 542 }, { "epoch": 1.62, "logps_train/policy_1_2": -121.90643310546875, "logps_train/policy_1_l": -116.73890686035156, "logps_train/policy_1_w": -99.12466430664062, "logps_train/policy_2_2": -91.0517807006836, "logps_train/policy_2_w": -130.13055419921875, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 0.9999808073043823, "rewards_train/1-l": -1.8852189779281616, "rewards_train/1-w": 2.6547207832336426, "rewards_train/2-2": 2.312009334564209, "rewards_train/2-w": 1.3338204622268677, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.539939761161804, "rewards_train/margins_1": 1.6547399759292603, "rewards_train/margins_2": 0.9781888723373413, "step": 542 }, { "epoch": 1.62, "logps_train/policy_1_2": -143.40182495117188, "logps_train/policy_1_l": -143.95021057128906, "logps_train/policy_1_w": -99.3877944946289, "logps_train/policy_2_2": -108.81935119628906, "logps_train/policy_2_w": -131.810302734375, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.112942099571228, "rewards_train/1-l": -1.718457579612732, "rewards_train/1-w": 2.0625877380371094, "rewards_train/2-2": 2.293065309524536, "rewards_train/2-w": 0.9338128566741943, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7810453176498413, "rewards_train/margins_1": 0.9496456384658813, "rewards_train/margins_2": 1.3592524528503418, "step": 542 }, { "epoch": 1.62, "logps_train/policy_1_2": -138.5448760986328, "logps_train/policy_1_l": -243.37762451171875, "logps_train/policy_1_w": -245.31475830078125, "logps_train/policy_2_2": -110.60677337646484, "logps_train/policy_2_w": -284.3306884765625, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -220.0, "logps_train/ref_1_w": -274.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -300.0, "rewards_train/1-2": 2.192777633666992, "rewards_train/1-l": -2.3736987113952637, "rewards_train/1-w": 2.8716483116149902, "rewards_train/2-2": 3.073697805404663, "rewards_train/2-w": 1.5169320106506348, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.245347023010254, "rewards_train/margins_1": 0.678870677947998, "rewards_train/margins_2": 1.5567657947540283, "step": 542 }, { "epoch": 1.62, "logps_train/policy_1_2": -203.10443115234375, "logps_train/policy_1_l": -187.06039428710938, "logps_train/policy_1_w": -173.17343139648438, "logps_train/policy_2_2": -155.02655029296875, "logps_train/policy_2_w": -212.1377410888672, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.9895576238632202, "rewards_train/1-l": -1.7710075378417969, "rewards_train/1-w": 3.2830469608306885, "rewards_train/2-2": 3.503593921661377, "rewards_train/2-w": 2.1416945457458496, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.054054498672485, "rewards_train/margins_1": 1.2934893369674683, "rewards_train/margins_2": 1.3618993759155273, "step": 542 }, { "epoch": 1.62, "logps_train/policy_1_2": -109.3076400756836, "logps_train/policy_1_l": -56.450897216796875, "logps_train/policy_1_w": -88.74935150146484, "logps_train/policy_2_2": -76.8462142944336, "logps_train/policy_2_w": -111.42828369140625, "logps_train/ref_1_2": -118.0, "logps_train/ref_1_l": -49.75, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 0.853611171245575, "rewards_train/1-l": -0.6877411603927612, "rewards_train/1-w": 1.7070958614349365, "rewards_train/2-2": 2.1478004455566406, "rewards_train/2-w": 0.6673280000686646, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.3948370218276978, "rewards_train/margins_1": 0.8534846901893616, "rewards_train/margins_2": 1.480472445487976, "step": 542 }, { "epoch": 1.62, "logps_train/policy_1_2": -218.959716796875, "logps_train/policy_1_l": -143.88906860351562, "logps_train/policy_1_w": -204.42709350585938, "logps_train/policy_2_2": -174.24452209472656, "logps_train/policy_2_w": -249.44537353515625, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -236.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 1.7993409633636475, "rewards_train/1-l": -1.2471108436584473, "rewards_train/1-w": 3.1385409832000732, "rewards_train/2-2": 3.536484718322754, "rewards_train/2-w": 1.0960884094238281, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.3856518268585205, "rewards_train/margins_1": 1.3392000198364258, "rewards_train/margins_2": 2.440396308898926, "step": 542 }, { "epoch": 1.63, "logps_train/policy_1_2": -169.12953186035156, "logps_train/policy_1_l": -159.3674774169922, "logps_train/policy_1_w": -79.8687515258789, "logps_train/policy_2_2": -128.55592346191406, "logps_train/policy_2_w": -110.13209533691406, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -104.5, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 0.8511085510253906, "rewards_train/1-l": -1.8226858377456665, "rewards_train/1-w": 2.4881248474121094, "rewards_train/2-2": 2.7637429237365723, "rewards_train/2-w": 1.3196032047271729, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.310810685157776, "rewards_train/margins_1": 1.6370162963867188, "rewards_train/margins_2": 1.4441397190093994, "step": 543 }, { "epoch": 1.63, "logps_train/policy_1_2": -191.35789489746094, "logps_train/policy_1_l": -180.444091796875, "logps_train/policy_1_w": -200.22398376464844, "logps_train/policy_2_2": -163.2449493408203, "logps_train/policy_2_w": -239.15838623046875, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -236.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 2.557960033416748, "rewards_train/1-l": -1.444408893585205, "rewards_train/1-w": 3.6182265281677246, "rewards_train/2-2": 3.7567553520202637, "rewards_train/2-w": 2.027911901473999, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.06263542175293, "rewards_train/margins_1": 1.0602664947509766, "rewards_train/margins_2": 1.7288434505462646, "step": 543 }, { "epoch": 1.63, "logps_train/policy_1_2": -166.5164337158203, "logps_train/policy_1_l": -195.2896728515625, "logps_train/policy_1_w": -200.05587768554688, "logps_train/policy_2_2": -140.1077117919922, "logps_train/policy_2_w": -243.92282104492188, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -240.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -264.0, "rewards_train/1-2": 2.5921058654785156, "rewards_train/1-l": -1.5258424282073975, "rewards_train/1-w": 3.9826929569244385, "rewards_train/2-2": 3.6548547744750977, "rewards_train/2-w": 1.990531086921692, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.508535385131836, "rewards_train/margins_1": 1.3905870914459229, "rewards_train/margins_2": 1.6643236875534058, "step": 543 }, { "epoch": 1.63, "logps_train/policy_1_2": -70.85760498046875, "logps_train/policy_1_l": -82.92221069335938, "logps_train/policy_1_w": -60.418670654296875, "logps_train/policy_2_2": -50.47887420654297, "logps_train/policy_2_w": -87.50382995605469, "logps_train/ref_1_2": -79.0, "logps_train/ref_1_l": -73.0, "logps_train/ref_1_w": -82.5, "logps_train/ref_2_2": -68.5, "logps_train/ref_2_w": -99.0, "rewards_train/1-2": 0.8220516443252563, "rewards_train/1-l": -1.0148777961730957, "rewards_train/1-w": 2.2154576778411865, "rewards_train/2-2": 1.7958627939224243, "rewards_train/2-w": 1.1082103252410889, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2303354740142822, "rewards_train/margins_1": 1.3934060335159302, "rewards_train/margins_2": 0.6876524686813354, "step": 543 }, { "epoch": 1.63, "logps_train/policy_1_2": -65.2161636352539, "logps_train/policy_1_l": -62.907596588134766, "logps_train/policy_1_w": -43.00772476196289, "logps_train/policy_2_2": -47.53875732421875, "logps_train/policy_2_w": -57.75368118286133, "logps_train/ref_1_2": -78.5, "logps_train/ref_1_l": -52.0, "logps_train/ref_1_w": -59.25, "logps_train/ref_2_2": -68.5, "logps_train/ref_2_w": -67.0, "rewards_train/1-2": 1.3283843994140625, "rewards_train/1-l": -1.0813846588134766, "rewards_train/1-w": 1.6378995180130005, "rewards_train/2-2": 2.1273744106292725, "rewards_train/2-w": 0.8886944651603699, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.719284176826477, "rewards_train/margins_1": 0.309515118598938, "rewards_train/margins_2": 1.2386799454689026, "step": 543 }, { "epoch": 1.63, "logps_train/policy_1_2": -73.52633666992188, "logps_train/policy_1_l": -59.209808349609375, "logps_train/policy_1_w": -44.53728103637695, "logps_train/policy_2_2": -51.90166473388672, "logps_train/policy_2_w": -65.23414611816406, "logps_train/ref_1_2": -82.0, "logps_train/ref_1_l": -48.0, "logps_train/ref_1_w": -63.0, "logps_train/ref_2_2": -70.0, "logps_train/ref_2_w": -76.0, "rewards_train/1-2": 0.849597156047821, "rewards_train/1-l": -1.1149171590805054, "rewards_train/1-w": 1.8509594202041626, "rewards_train/2-2": 1.8439581394195557, "rewards_train/2-w": 1.0767806768417358, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.965876579284668, "rewards_train/margins_1": 1.0013622641563416, "rewards_train/margins_2": 0.7671774625778198, "step": 543 }, { "epoch": 1.63, "logps_train/policy_1_2": -150.24234008789062, "logps_train/policy_1_l": -179.96923828125, "logps_train/policy_1_w": -148.79144287109375, "logps_train/policy_2_2": -122.39286041259766, "logps_train/policy_2_w": -188.5968475341797, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.4558441638946533, "rewards_train/1-l": -2.0887203216552734, "rewards_train/1-w": 3.25288724899292, "rewards_train/2-2": 2.600557804107666, "rewards_train/2-w": 1.3879717588424683, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.341607570648193, "rewards_train/margins_1": 1.7970430850982666, "rewards_train/margins_2": 1.2125860452651978, "step": 543 }, { "epoch": 1.63, "logps_train/policy_1_2": -181.7463836669922, "logps_train/policy_1_l": -154.34609985351562, "logps_train/policy_1_w": -90.67538452148438, "logps_train/policy_2_2": -144.7476348876953, "logps_train/policy_2_w": -114.59828186035156, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 2.6925487518310547, "rewards_train/1-l": -1.872499704360962, "rewards_train/1-w": 3.6207432746887207, "rewards_train/2-2": 4.037736892700195, "rewards_train/2-w": 2.5714211463928223, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.493242979049683, "rewards_train/margins_1": 0.928194522857666, "rewards_train/margins_2": 1.466315746307373, "step": 543 }, { "epoch": 1.63, "learning_rate": 4.6878280498579674e-07, "loss": 0.4796, "step": 544 }, { "epoch": 1.63, "logps_train/policy_1_2": -192.4593505859375, "logps_train/policy_1_l": -159.40066528320312, "logps_train/policy_1_w": -107.05307006835938, "logps_train/policy_2_2": -151.21437072753906, "logps_train/policy_2_w": -145.6978302001953, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.5446906089782715, "rewards_train/1-l": -1.6244417428970337, "rewards_train/1-w": 3.033365488052368, "rewards_train/2-2": 3.1598129272460938, "rewards_train/2-w": 1.9309983253479004, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.657807230949402, "rewards_train/margins_1": 1.4886748790740967, "rewards_train/margins_2": 1.2288146018981934, "step": 544 }, { "epoch": 1.63, "logps_train/policy_1_2": -203.65228271484375, "logps_train/policy_1_l": -142.23336791992188, "logps_train/policy_1_w": -136.32095336914062, "logps_train/policy_2_2": -150.26364135742188, "logps_train/policy_2_w": -183.228271484375, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.2847727537155151, "rewards_train/1-l": -2.1906228065490723, "rewards_train/1-w": 3.7694666385650635, "rewards_train/2-2": 3.965822696685791, "rewards_train/2-w": 1.9474855661392212, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.960089445114136, "rewards_train/margins_1": 2.4846938848495483, "rewards_train/margins_2": 2.01833713054657, "step": 544 }, { "epoch": 1.63, "logps_train/policy_1_2": -62.95248031616211, "logps_train/policy_1_l": -119.06005859375, "logps_train/policy_1_w": -68.82649230957031, "logps_train/policy_2_2": -41.70838165283203, "logps_train/policy_2_w": -108.0421142578125, "logps_train/ref_1_2": -76.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -97.5, "logps_train/ref_2_2": -62.0, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 1.2953767776489258, "rewards_train/1-l": -1.8224124908447266, "rewards_train/1-w": 2.876236915588379, "rewards_train/2-2": 2.0261590480804443, "rewards_train/2-w": 1.0676645040512085, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.6986494064331055, "rewards_train/margins_1": 1.5808601379394531, "rewards_train/margins_2": 0.9584945440292358, "step": 544 }, { "epoch": 1.63, "logps_train/policy_1_2": -112.91090393066406, "logps_train/policy_1_l": -144.14590454101562, "logps_train/policy_1_w": -118.14160919189453, "logps_train/policy_2_2": -83.53015899658203, "logps_train/policy_2_w": -166.4778289794922, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 1.7214105129241943, "rewards_train/1-l": -2.539590358734131, "rewards_train/1-w": 3.8811521530151367, "rewards_train/2-2": 3.0251095294952393, "rewards_train/2-w": 1.6600298881530762, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.420742511749268, "rewards_train/margins_1": 2.1597416400909424, "rewards_train/margins_2": 1.365079641342163, "step": 544 }, { "epoch": 1.63, "logps_train/policy_1_2": -118.56233978271484, "logps_train/policy_1_l": -149.39654541015625, "logps_train/policy_1_w": -91.77500915527344, "logps_train/policy_2_2": -87.51535034179688, "logps_train/policy_2_w": -124.2105712890625, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.7433754205703735, "rewards_train/1-l": -2.053523063659668, "rewards_train/1-w": 2.5496468544006348, "rewards_train/2-2": 1.929519772529602, "rewards_train/2-w": 1.1765992641448975, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.603169918060303, "rewards_train/margins_1": 1.8062714338302612, "rewards_train/margins_2": 0.7529205083847046, "step": 544 }, { "epoch": 1.63, "logps_train/policy_1_2": -249.635009765625, "logps_train/policy_1_l": -246.79449462890625, "logps_train/policy_1_w": -191.27774047851562, "logps_train/policy_2_2": -189.11984252929688, "logps_train/policy_2_w": -237.35641479492188, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -236.0, "logps_train/ref_2_2": -233.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 2.098999500274658, "rewards_train/1-l": -3.4341366291046143, "rewards_train/1-w": 4.389413833618164, "rewards_train/2-2": 4.438015937805176, "rewards_train/2-w": 1.8268592357635498, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.823550462722778, "rewards_train/margins_1": 2.290414333343506, "rewards_train/margins_2": 2.611156702041626, "step": 544 }, { "epoch": 1.63, "logps_train/policy_1_2": -161.5963592529297, "logps_train/policy_1_l": -88.32713317871094, "logps_train/policy_1_w": -105.6415786743164, "logps_train/policy_2_2": -125.1763916015625, "logps_train/policy_2_w": -134.39486694335938, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -77.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.4583332538604736, "rewards_train/1-l": -1.1284160614013672, "rewards_train/1-w": 3.4161643981933594, "rewards_train/2-2": 3.1692748069763184, "rewards_train/2-w": 1.8895913362503052, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.544580459594727, "rewards_train/margins_1": 1.9578311443328857, "rewards_train/margins_2": 1.2796834707260132, "step": 544 }, { "epoch": 1.63, "logps_train/policy_1_2": -132.06248474121094, "logps_train/policy_1_l": -144.75650024414062, "logps_train/policy_1_w": -79.79061126708984, "logps_train/policy_2_2": -102.32937622070312, "logps_train/policy_2_w": -97.9760513305664, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -97.5, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -108.0, "rewards_train/1-2": 1.0437523126602173, "rewards_train/1-l": -2.4807281494140625, "rewards_train/1-w": 1.7795329093933105, "rewards_train/2-2": 2.2865943908691406, "rewards_train/2-w": 1.002394199371338, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.260261058807373, "rewards_train/margins_1": 0.7357805967330933, "rewards_train/margins_2": 1.2842001914978027, "step": 544 }, { "epoch": 1.63, "logps_train/policy_1_2": -158.0294189453125, "logps_train/policy_1_l": -247.9497833251953, "logps_train/policy_1_w": -134.46180725097656, "logps_train/policy_2_2": -112.99391174316406, "logps_train/policy_2_w": -191.4703826904297, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -217.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.5826053619384766, "rewards_train/1-l": -3.1074771881103516, "rewards_train/1-w": 3.5960073471069336, "rewards_train/2-2": 3.062718391418457, "rewards_train/2-w": 2.221712589263916, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.703484535217285, "rewards_train/margins_1": 2.013401985168457, "rewards_train/margins_2": 0.841005802154541, "step": 545 }, { "epoch": 1.63, "logps_train/policy_1_2": -163.5924835205078, "logps_train/policy_1_l": -163.60398864746094, "logps_train/policy_1_w": -171.73654174804688, "logps_train/policy_2_2": -121.56253051757812, "logps_train/policy_2_w": -221.30642700195312, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.6391887664794922, "rewards_train/1-l": -2.168113946914673, "rewards_train/1-w": 4.051346778869629, "rewards_train/2-2": 3.2312467098236084, "rewards_train/2-w": 1.606858491897583, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.219460725784302, "rewards_train/margins_1": 2.4121580123901367, "rewards_train/margins_2": 1.6243882179260254, "step": 545 }, { "epoch": 1.63, "logps_train/policy_1_2": -194.860107421875, "logps_train/policy_1_l": -153.5467529296875, "logps_train/policy_1_w": -115.47317504882812, "logps_train/policy_2_2": -161.6722869873047, "logps_train/policy_2_w": -156.86737060546875, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.9046142101287842, "rewards_train/1-l": -2.475770950317383, "rewards_train/1-w": 3.21557354927063, "rewards_train/2-2": 3.374567985534668, "rewards_train/2-w": 1.6466617584228516, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.691344499588013, "rewards_train/margins_1": 1.3109593391418457, "rewards_train/margins_2": 1.7279062271118164, "step": 545 }, { "epoch": 1.63, "logps_train/policy_1_2": -97.25261688232422, "logps_train/policy_1_l": -71.37454986572266, "logps_train/policy_1_w": -63.91591262817383, "logps_train/policy_2_2": -62.24186706542969, "logps_train/policy_2_w": -93.28050231933594, "logps_train/ref_1_2": -102.5, "logps_train/ref_1_l": -58.0, "logps_train/ref_1_w": -83.5, "logps_train/ref_2_2": -82.0, "logps_train/ref_2_w": -99.0, "rewards_train/1-2": 0.527863085269928, "rewards_train/1-l": -1.3437047004699707, "rewards_train/1-w": 1.9347758293151855, "rewards_train/2-2": 1.9476885795593262, "rewards_train/2-w": 0.5553479194641113, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2784805297851562, "rewards_train/margins_1": 1.4069127440452576, "rewards_train/margins_2": 1.3923406600952148, "step": 545 }, { "epoch": 1.63, "logps_train/policy_1_2": -226.7255096435547, "logps_train/policy_1_l": -263.0119934082031, "logps_train/policy_1_w": -256.00799560546875, "logps_train/policy_2_2": -165.40628051757812, "logps_train/policy_2_w": -332.9366455078125, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -220.0, "logps_train/ref_1_w": -304.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -340.0, "rewards_train/1-2": 1.489948034286499, "rewards_train/1-l": -4.35745096206665, "rewards_train/1-w": 4.7616987228393555, "rewards_train/2-2": 4.296872138977051, "rewards_train/2-w": 0.8422720432281494, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 9.119149684906006, "rewards_train/margins_1": 3.2717506885528564, "rewards_train/margins_2": 3.4546000957489014, "step": 545 }, { "epoch": 1.63, "logps_train/policy_1_2": -136.4765625, "logps_train/policy_1_l": -242.2080535888672, "logps_train/policy_1_w": -52.7982292175293, "logps_train/policy_2_2": -103.53790283203125, "logps_train/policy_2_w": -74.99433135986328, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -69.5, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -85.0, "rewards_train/1-2": 0.8890631198883057, "rewards_train/1-l": -3.8879942893981934, "rewards_train/1-w": 1.6964465379714966, "rewards_train/2-2": 2.554413318634033, "rewards_train/2-w": 0.9976369738578796, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.58444082736969, "rewards_train/margins_1": 0.8073834180831909, "rewards_train/margins_2": 1.5567763447761536, "step": 545 }, { "epoch": 1.63, "logps_train/policy_1_2": -126.037841796875, "logps_train/policy_1_l": -167.91131591796875, "logps_train/policy_1_w": -103.88591003417969, "logps_train/policy_2_2": -76.21961975097656, "logps_train/policy_2_w": -161.66146850585938, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 0.8446528911590576, "rewards_train/1-l": -2.438593626022339, "rewards_train/1-w": 3.529670238494873, "rewards_train/2-2": 2.7967875003814697, "rewards_train/2-w": 1.6418616771697998, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.968263864517212, "rewards_train/margins_1": 2.6850173473358154, "rewards_train/margins_2": 1.15492582321167, "step": 545 }, { "epoch": 1.63, "logps_train/policy_1_2": -166.67147827148438, "logps_train/policy_1_l": -150.398681640625, "logps_train/policy_1_w": -77.60382080078125, "logps_train/policy_2_2": -124.567138671875, "logps_train/policy_2_w": -95.04624938964844, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -110.0, "rewards_train/1-2": 0.4719158411026001, "rewards_train/1-l": -1.6394785642623901, "rewards_train/1-w": 2.2513368129730225, "rewards_train/2-2": 2.637817144393921, "rewards_train/2-w": 1.4527966976165771, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8908153772354126, "rewards_train/margins_1": 1.7794209718704224, "rewards_train/margins_2": 1.1850204467773438, "step": 545 }, { "epoch": 1.63, "learning_rate": 4.54483701302994e-07, "loss": 0.4154, "step": 546 }, { "epoch": 1.63, "logps_train/policy_1_2": -183.2451629638672, "logps_train/policy_1_l": -172.02035522460938, "logps_train/policy_1_w": -166.02191162109375, "logps_train/policy_2_2": -142.0950927734375, "logps_train/policy_2_w": -234.61053466796875, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -252.0, "rewards_train/1-2": 2.0567336082458496, "rewards_train/1-l": -2.1979339122772217, "rewards_train/1-w": 4.435310363769531, "rewards_train/2-2": 3.41158390045166, "rewards_train/2-w": 1.7108216285705566, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.633244276046753, "rewards_train/margins_1": 2.3785767555236816, "rewards_train/margins_2": 1.7007622718811035, "step": 546 }, { "epoch": 1.63, "logps_train/policy_1_2": -193.5474395751953, "logps_train/policy_1_l": -226.14678955078125, "logps_train/policy_1_w": -190.09112548828125, "logps_train/policy_2_2": -146.89450073242188, "logps_train/policy_2_w": -238.43460083007812, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -201.0, "logps_train/ref_1_w": -228.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -258.0, "rewards_train/1-2": 1.5714282989501953, "rewards_train/1-l": -2.5084285736083984, "rewards_train/1-w": 3.758075475692749, "rewards_train/2-2": 3.504298686981201, "rewards_train/2-w": 2.022165298461914, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.2665040493011475, "rewards_train/margins_1": 2.1866471767425537, "rewards_train/margins_2": 1.482133388519287, "step": 546 }, { "epoch": 1.63, "logps_train/policy_1_2": -166.98545837402344, "logps_train/policy_1_l": -156.73268127441406, "logps_train/policy_1_w": -80.50861358642578, "logps_train/policy_2_2": -121.49674987792969, "logps_train/policy_2_w": -109.58222961425781, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -119.5, "rewards_train/1-2": 1.3037980794906616, "rewards_train/1-l": -2.2693612575531006, "rewards_train/1-w": 2.113982677459717, "rewards_train/2-2": 3.1612625122070312, "rewards_train/2-w": 0.9652140140533447, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.383343935012817, "rewards_train/margins_1": 0.8101845979690552, "rewards_train/margins_2": 2.1960484981536865, "step": 546 }, { "epoch": 1.63, "logps_train/policy_1_2": -119.07246398925781, "logps_train/policy_1_l": -135.52015686035156, "logps_train/policy_1_w": -69.66118621826172, "logps_train/policy_2_2": -99.99069213867188, "logps_train/policy_2_w": -86.09539794921875, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -103.5, "rewards_train/1-2": 1.4318162202835083, "rewards_train/1-l": -2.0641252994537354, "rewards_train/1-w": 2.0932564735412598, "rewards_train/2-2": 2.481595516204834, "rewards_train/2-w": 1.7303040027618408, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.157381772994995, "rewards_train/margins_1": 0.6614402532577515, "rewards_train/margins_2": 0.7512915134429932, "step": 546 }, { "epoch": 1.63, "logps_train/policy_1_2": -92.09626770019531, "logps_train/policy_1_l": -138.4383087158203, "logps_train/policy_1_w": -90.16215515136719, "logps_train/policy_2_2": -72.95267486572266, "logps_train/policy_2_w": -117.22273254394531, "logps_train/ref_1_2": -104.5, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -89.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.2716237306594849, "rewards_train/1-l": -1.7087726593017578, "rewards_train/1-w": 2.119136095046997, "rewards_train/2-2": 1.6453580856323242, "rewards_train/2-w": 1.4246013164520264, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.827908754348755, "rewards_train/margins_1": 0.8475123643875122, "rewards_train/margins_2": 0.22075676918029785, "step": 546 }, { "epoch": 1.63, "logps_train/policy_1_2": -145.1712188720703, "logps_train/policy_1_l": -165.39207458496094, "logps_train/policy_1_w": -87.66795349121094, "logps_train/policy_2_2": -107.35589599609375, "logps_train/policy_2_w": -133.46371459960938, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.8828778266906738, "rewards_train/1-l": -3.2118639945983887, "rewards_train/1-w": 2.8761730194091797, "rewards_train/2-2": 3.3737854957580566, "rewards_train/2-w": 1.7153465747833252, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.088037014007568, "rewards_train/margins_1": 0.9932951927185059, "rewards_train/margins_2": 1.6584389209747314, "step": 546 }, { "epoch": 1.63, "logps_train/policy_1_2": -148.67074584960938, "logps_train/policy_1_l": -110.77322387695312, "logps_train/policy_1_w": -89.49769592285156, "logps_train/policy_2_2": -116.22335052490234, "logps_train/policy_2_w": -119.85192108154297, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -115.5, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 1.2672998905181885, "rewards_train/1-l": -1.1171659231185913, "rewards_train/1-w": 2.603355884552002, "rewards_train/2-2": 2.452664852142334, "rewards_train/2-w": 1.5148074626922607, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7205218076705933, "rewards_train/margins_1": 1.3360559940338135, "rewards_train/margins_2": 0.9378573894500732, "step": 546 }, { "epoch": 1.63, "logps_train/policy_1_2": -235.07748413085938, "logps_train/policy_1_l": -246.02203369140625, "logps_train/policy_1_w": -125.81201171875, "logps_train/policy_2_2": -189.99954223632812, "logps_train/policy_2_w": -156.52386474609375, "logps_train/ref_1_2": -255.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": 1.9360020160675049, "rewards_train/1-l": -2.42173433303833, "rewards_train/1-w": 3.3383302688598633, "rewards_train/2-2": 3.8172338008880615, "rewards_train/2-w": 2.4765195846557617, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.760064601898193, "rewards_train/margins_1": 1.4023282527923584, "rewards_train/margins_2": 1.3407142162322998, "step": 546 }, { "epoch": 1.64, "logps_train/policy_1_2": -145.63644409179688, "logps_train/policy_1_l": -152.43362426757812, "logps_train/policy_1_w": -117.19017791748047, "logps_train/policy_2_2": -106.1117935180664, "logps_train/policy_2_w": -153.22869873046875, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 0.9426053166389465, "rewards_train/1-l": -2.6195335388183594, "rewards_train/1-w": 3.1567635536193848, "rewards_train/2-2": 2.8997583389282227, "rewards_train/2-w": 1.5552542209625244, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.776297092437744, "rewards_train/margins_1": 2.2141582369804382, "rewards_train/margins_2": 1.3445041179656982, "step": 547 }, { "epoch": 1.64, "logps_train/policy_1_2": -175.0005645751953, "logps_train/policy_1_l": -145.69638061523438, "logps_train/policy_1_w": -140.3031005859375, "logps_train/policy_2_2": -125.75741577148438, "logps_train/policy_2_w": -184.10877990722656, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.2186933755874634, "rewards_train/1-l": -1.3348736763000488, "rewards_train/1-w": 3.7985968589782715, "rewards_train/2-2": 3.0805087089538574, "rewards_train/2-w": 1.5594340562820435, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.13347053527832, "rewards_train/margins_1": 2.579903483390808, "rewards_train/margins_2": 1.521074652671814, "step": 547 }, { "epoch": 1.64, "logps_train/policy_1_2": -127.54795837402344, "logps_train/policy_1_l": -120.54280090332031, "logps_train/policy_1_w": -114.49539947509766, "logps_train/policy_2_2": -102.17816162109375, "logps_train/policy_2_w": -143.06204223632812, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.8764547109603882, "rewards_train/1-l": -1.4253257513046265, "rewards_train/1-w": 2.5938196182250977, "rewards_train/2-2": 2.7353086471557617, "rewards_train/2-w": 1.3898894786834717, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.019145369529724, "rewards_train/margins_1": 0.7173649072647095, "rewards_train/margins_2": 1.34541916847229, "step": 547 }, { "epoch": 1.64, "logps_train/policy_1_2": -110.80181121826172, "logps_train/policy_1_l": -173.86630249023438, "logps_train/policy_1_w": -95.73091888427734, "logps_train/policy_2_2": -80.23837280273438, "logps_train/policy_2_w": -127.68253326416016, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.510443925857544, "rewards_train/1-l": -1.6516695022583008, "rewards_train/1-w": 2.6519081592559814, "rewards_train/2-2": 2.576162576675415, "rewards_train/2-w": 1.3786221742630005, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.303577661514282, "rewards_train/margins_1": 1.1414642333984375, "rewards_train/margins_2": 1.1975404024124146, "step": 547 }, { "epoch": 1.64, "logps_train/policy_1_2": -148.79351806640625, "logps_train/policy_1_l": -365.0869140625, "logps_train/policy_1_w": -147.72340393066406, "logps_train/policy_2_2": -106.09086608886719, "logps_train/policy_2_w": -195.17515563964844, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -326.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.4718198776245117, "rewards_train/1-l": -3.8750596046447754, "rewards_train/1-w": 2.883909225463867, "rewards_train/2-2": 2.951850175857544, "rewards_train/2-w": 1.2887346744537354, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.758968830108643, "rewards_train/margins_1": 1.4120893478393555, "rewards_train/margins_2": 1.6631155014038086, "step": 547 }, { "epoch": 1.64, "logps_train/policy_1_2": -127.87423706054688, "logps_train/policy_1_l": -165.22329711914062, "logps_train/policy_1_w": -113.13884735107422, "logps_train/policy_2_2": -104.98939514160156, "logps_train/policy_2_w": -128.5243377685547, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.517263650894165, "rewards_train/1-l": -2.2790684700012207, "rewards_train/1-w": 2.954376220703125, "rewards_train/2-2": 2.716881275177002, "rewards_train/2-w": 2.3268632888793945, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.233444690704346, "rewards_train/margins_1": 1.43711256980896, "rewards_train/margins_2": 0.3900179862976074, "step": 547 }, { "epoch": 1.64, "logps_train/policy_1_2": -165.21484375, "logps_train/policy_1_l": -170.5940399169922, "logps_train/policy_1_w": -155.23873901367188, "logps_train/policy_2_2": -125.18013000488281, "logps_train/policy_2_w": -204.53965759277344, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.1394526958465576, "rewards_train/1-l": -2.031670093536377, "rewards_train/1-w": 3.757376194000244, "rewards_train/2-2": 2.5054244995117188, "rewards_train/2-w": 1.7991588115692139, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.789046287536621, "rewards_train/margins_1": 2.6179234981536865, "rewards_train/margins_2": 0.7062656879425049, "step": 547 }, { "epoch": 1.64, "logps_train/policy_1_2": -112.02449798583984, "logps_train/policy_1_l": -81.85867309570312, "logps_train/policy_1_w": -52.91957092285156, "logps_train/policy_2_2": -84.74922180175781, "logps_train/policy_2_w": -75.15377044677734, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -61.5, "logps_train/ref_1_w": -76.0, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -89.5, "rewards_train/1-2": 1.1491124629974365, "rewards_train/1-l": -2.0582308769226074, "rewards_train/1-w": 2.333043098449707, "rewards_train/2-2": 2.5057411193847656, "rewards_train/2-w": 1.421342134475708, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.3912739753723145, "rewards_train/margins_1": 1.1839306354522705, "rewards_train/margins_2": 1.0843989849090576, "step": 547 }, { "epoch": 1.64, "learning_rate": 4.40384236404921e-07, "loss": 0.4185, "step": 548 }, { "epoch": 1.64, "logps_train/policy_1_2": -214.78939819335938, "logps_train/policy_1_l": -227.86038208007812, "logps_train/policy_1_w": -174.54293823242188, "logps_train/policy_2_2": -174.80596923828125, "logps_train/policy_2_w": -224.73602294921875, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.6237945556640625, "rewards_train/1-l": -3.9293971061706543, "rewards_train/1-w": 3.9277379512786865, "rewards_train/2-2": 3.697528600692749, "rewards_train/2-w": 1.3709285259246826, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.857135057449341, "rewards_train/margins_1": 2.303943395614624, "rewards_train/margins_2": 2.3266000747680664, "step": 548 }, { "epoch": 1.64, "logps_train/policy_1_2": -146.99708557128906, "logps_train/policy_1_l": -137.44866943359375, "logps_train/policy_1_w": -95.37998962402344, "logps_train/policy_2_2": -124.07328796386719, "logps_train/policy_2_w": -114.73622131347656, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 1.9823224544525146, "rewards_train/1-l": -1.3626891374588013, "rewards_train/1-w": 2.4580464363098145, "rewards_train/2-2": 3.1051716804504395, "rewards_train/2-w": 1.8391709327697754, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8207355737686157, "rewards_train/margins_1": 0.4757239818572998, "rewards_train/margins_2": 1.266000747680664, "step": 548 }, { "epoch": 1.64, "logps_train/policy_1_2": -77.248046875, "logps_train/policy_1_l": -82.39274597167969, "logps_train/policy_1_w": -58.752647399902344, "logps_train/policy_2_2": -54.2340087890625, "logps_train/policy_2_w": -74.7402572631836, "logps_train/ref_1_2": -91.0, "logps_train/ref_1_l": -67.0, "logps_train/ref_1_w": -79.5, "logps_train/ref_2_2": -78.5, "logps_train/ref_2_w": -88.5, "rewards_train/1-2": 1.3767578601837158, "rewards_train/1-l": -1.5573902130126953, "rewards_train/1-w": 2.073026418685913, "rewards_train/2-2": 2.4254274368286133, "rewards_train/2-w": 1.364255666732788, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6304166316986084, "rewards_train/margins_1": 0.6962685585021973, "rewards_train/margins_2": 1.0611717700958252, "step": 548 }, { "epoch": 1.64, "logps_train/policy_1_2": -204.07762145996094, "logps_train/policy_1_l": -208.01126098632812, "logps_train/policy_1_w": -155.9451141357422, "logps_train/policy_2_2": -159.07591247558594, "logps_train/policy_2_w": -210.59219360351562, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.3094251155853271, "rewards_train/1-l": -3.2698757648468018, "rewards_train/1-w": 4.028925895690918, "rewards_train/2-2": 3.3345959186553955, "rewards_train/2-w": 2.0204696655273438, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.29880166053772, "rewards_train/margins_1": 2.719500780105591, "rewards_train/margins_2": 1.3141262531280518, "step": 548 }, { "epoch": 1.64, "logps_train/policy_1_2": -203.3498077392578, "logps_train/policy_1_l": -199.07818603515625, "logps_train/policy_1_w": -136.57005310058594, "logps_train/policy_2_2": -156.33230590820312, "logps_train/policy_2_w": -186.60313415527344, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 2.083770275115967, "rewards_train/1-l": -2.4609429836273193, "rewards_train/1-w": 4.074244499206543, "rewards_train/2-2": 3.9542691707611084, "rewards_train/2-w": 1.7646865844726562, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.535187482833862, "rewards_train/margins_1": 1.9904742240905762, "rewards_train/margins_2": 2.189582586288452, "step": 548 }, { "epoch": 1.64, "logps_train/policy_1_2": -186.84686279296875, "logps_train/policy_1_l": -181.08547973632812, "logps_train/policy_1_w": -184.03271484375, "logps_train/policy_2_2": -146.3291778564453, "logps_train/policy_2_w": -253.36978149414062, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 2.6778128147125244, "rewards_train/1-l": -2.047244071960449, "rewards_train/1-w": 3.968601942062378, "rewards_train/2-2": 4.063956260681152, "rewards_train/2-w": 1.613022804260254, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.015846014022827, "rewards_train/margins_1": 1.2907891273498535, "rewards_train/margins_2": 2.4509334564208984, "step": 548 }, { "epoch": 1.64, "logps_train/policy_1_2": -126.842529296875, "logps_train/policy_1_l": -165.5086212158203, "logps_train/policy_1_w": -138.22097778320312, "logps_train/policy_2_2": -95.72187042236328, "logps_train/policy_2_w": -173.9759521484375, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.1610597372055054, "rewards_train/1-l": -2.071272373199463, "rewards_train/1-w": 3.2075908184051514, "rewards_train/2-2": 2.4942193031311035, "rewards_train/2-w": 1.532092571258545, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.278863191604614, "rewards_train/margins_1": 2.046531081199646, "rewards_train/margins_2": 0.9621267318725586, "step": 548 }, { "epoch": 1.64, "logps_train/policy_1_2": -155.49777221679688, "logps_train/policy_1_l": -123.86920166015625, "logps_train/policy_1_w": -157.00633239746094, "logps_train/policy_2_2": -124.16899871826172, "logps_train/policy_2_w": -189.93331909179688, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.7939729690551758, "rewards_train/1-l": -1.7769596576690674, "rewards_train/1-w": 3.555616855621338, "rewards_train/2-2": 3.2362256050109863, "rewards_train/2-w": 2.094169855117798, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.332576513290405, "rewards_train/margins_1": 1.761643886566162, "rewards_train/margins_2": 1.1420557498931885, "step": 548 }, { "epoch": 1.64, "logps_train/policy_1_2": -159.8958740234375, "logps_train/policy_1_l": -190.86471557617188, "logps_train/policy_1_w": -129.12319946289062, "logps_train/policy_2_2": -123.6187515258789, "logps_train/policy_2_w": -165.6288299560547, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.770568609237671, "rewards_train/1-l": -2.6905736923217773, "rewards_train/1-w": 3.0158045291900635, "rewards_train/2-2": 2.984999656677246, "rewards_train/2-w": 2.0246171951293945, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.706378221511841, "rewards_train/margins_1": 1.2452359199523926, "rewards_train/margins_2": 0.9603824615478516, "step": 549 }, { "epoch": 1.64, "logps_train/policy_1_2": -212.02606201171875, "logps_train/policy_1_l": -169.11184692382812, "logps_train/policy_1_w": -139.29849243164062, "logps_train/policy_2_2": -177.72023010253906, "logps_train/policy_2_w": -172.13076782226562, "logps_train/ref_1_2": -225.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.2825498580932617, "rewards_train/1-l": -1.7119648456573486, "rewards_train/1-w": 2.628744125366211, "rewards_train/2-2": 2.6162586212158203, "rewards_train/2-w": 1.299422264099121, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.34070897102356, "rewards_train/margins_1": 1.3461942672729492, "rewards_train/margins_2": 1.3168363571166992, "step": 549 }, { "epoch": 1.64, "logps_train/policy_1_2": -166.14111328125, "logps_train/policy_1_l": -166.52822875976562, "logps_train/policy_1_w": -92.23683166503906, "logps_train/policy_2_2": -127.125244140625, "logps_train/policy_2_w": -135.24484252929688, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.8866689801216125, "rewards_train/1-l": -2.401750087738037, "rewards_train/1-w": 2.590184211730957, "rewards_train/2-2": 2.6054441928863525, "rewards_train/2-w": 1.1012966632843018, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.991934299468994, "rewards_train/margins_1": 1.7035152316093445, "rewards_train/margins_2": 1.5041475296020508, "step": 549 }, { "epoch": 1.64, "logps_train/policy_1_2": -144.7537078857422, "logps_train/policy_1_l": -136.3358154296875, "logps_train/policy_1_w": -118.07320404052734, "logps_train/policy_2_2": -111.00950622558594, "logps_train/policy_2_w": -148.719482421875, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 1.441817045211792, "rewards_train/1-l": -1.66912841796875, "rewards_train/1-w": 2.819242000579834, "rewards_train/2-2": 2.859205722808838, "rewards_train/2-w": 2.0303962230682373, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.488370418548584, "rewards_train/margins_1": 1.377424955368042, "rewards_train/margins_2": 0.8288094997406006, "step": 549 }, { "epoch": 1.64, "logps_train/policy_1_2": -181.88812255859375, "logps_train/policy_1_l": -274.0022277832031, "logps_train/policy_1_w": -132.66847229003906, "logps_train/policy_2_2": -125.46612548828125, "logps_train/policy_2_w": -185.28125, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -247.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.5611872673034668, "rewards_train/1-l": -2.7517852783203125, "rewards_train/1-w": 3.3526840209960938, "rewards_train/2-2": 2.925262928009033, "rewards_train/2-w": 1.4843742847442627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.104469299316406, "rewards_train/margins_1": 1.791496753692627, "rewards_train/margins_2": 1.4408886432647705, "step": 549 }, { "epoch": 1.64, "logps_train/policy_1_2": -96.90390014648438, "logps_train/policy_1_l": -89.9940185546875, "logps_train/policy_1_w": -58.8332633972168, "logps_train/policy_2_2": -72.49574279785156, "logps_train/policy_2_w": -76.0456314086914, "logps_train/ref_1_2": -106.0, "logps_train/ref_1_l": -75.0, "logps_train/ref_1_w": -74.5, "logps_train/ref_2_2": -89.0, "logps_train/ref_2_w": -88.0, "rewards_train/1-2": 0.8918360471725464, "rewards_train/1-l": -1.4724490642547607, "rewards_train/1-w": 1.5780020952224731, "rewards_train/2-2": 1.647202968597412, "rewards_train/2-w": 1.1641871929168701, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.050451159477234, "rewards_train/margins_1": 0.6861660480499268, "rewards_train/margins_2": 0.483015775680542, "step": 549 }, { "epoch": 1.64, "logps_train/policy_1_2": -194.0033721923828, "logps_train/policy_1_l": -180.3592529296875, "logps_train/policy_1_w": -111.51896667480469, "logps_train/policy_2_2": -163.02511596679688, "logps_train/policy_2_w": -135.722412109375, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.5965385437011719, "rewards_train/1-l": -2.2964725494384766, "rewards_train/1-w": 2.532087564468384, "rewards_train/2-2": 2.5568647384643555, "rewards_train/2-w": 1.7752199172973633, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.82856011390686, "rewards_train/margins_1": 0.9355490207672119, "rewards_train/margins_2": 0.7816448211669922, "step": 549 }, { "epoch": 1.64, "logps_train/policy_1_2": -146.97695922851562, "logps_train/policy_1_l": -170.7696990966797, "logps_train/policy_1_w": -151.5588836669922, "logps_train/policy_2_2": -110.62907409667969, "logps_train/policy_2_w": -198.39663696289062, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": 2.0257415771484375, "rewards_train/1-l": -1.7707200050354004, "rewards_train/1-w": 3.4472365379333496, "rewards_train/2-2": 3.3980300426483154, "rewards_train/2-w": 1.2540867328643799, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.21795654296875, "rewards_train/margins_1": 1.421494960784912, "rewards_train/margins_2": 2.1439433097839355, "step": 549 }, { "epoch": 1.65, "learning_rate": 4.264857863744956e-07, "loss": 0.3579, "step": 550 }, { "epoch": 1.65, "logps_train/policy_1_2": -155.27584838867188, "logps_train/policy_1_l": -157.39215087890625, "logps_train/policy_1_w": -82.13374328613281, "logps_train/policy_2_2": -114.98291015625, "logps_train/policy_2_w": -107.45530700683594, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.7036652565002441, "rewards_train/1-l": -1.258746862411499, "rewards_train/1-w": 2.8522510528564453, "rewards_train/2-2": 2.9345221519470215, "rewards_train/2-w": 1.8505629301071167, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.110997915267944, "rewards_train/margins_1": 1.1485857963562012, "rewards_train/margins_2": 1.0839592218399048, "step": 550 }, { "epoch": 1.65, "logps_train/policy_1_2": -164.52456665039062, "logps_train/policy_1_l": -181.34719848632812, "logps_train/policy_1_w": -108.40311431884766, "logps_train/policy_2_2": -128.42156982421875, "logps_train/policy_2_w": -144.617919921875, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.8225445747375488, "rewards_train/1-l": -1.6558144092559814, "rewards_train/1-w": 3.35636830329895, "rewards_train/2-2": 3.1047189235687256, "rewards_train/2-w": 1.9010989665985107, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.012182712554932, "rewards_train/margins_1": 1.5338237285614014, "rewards_train/margins_2": 1.2036199569702148, "step": 550 }, { "epoch": 1.65, "logps_train/policy_1_2": -191.62548828125, "logps_train/policy_1_l": -150.96527099609375, "logps_train/policy_1_w": -153.12442016601562, "logps_train/policy_2_2": -145.0041046142578, "logps_train/policy_2_w": -205.8687744140625, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -223.0, "rewards_train/1-2": 0.9026854634284973, "rewards_train/1-l": -3.002289056777954, "rewards_train/1-w": 3.6352133750915527, "rewards_train/2-2": 2.7116987705230713, "rewards_train/2-w": 1.706871747970581, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.637502431869507, "rewards_train/margins_1": 2.7325279116630554, "rewards_train/margins_2": 1.0048270225524902, "step": 550 }, { "epoch": 1.65, "logps_train/policy_1_2": -159.72271728515625, "logps_train/policy_1_l": -131.111328125, "logps_train/policy_1_w": -97.01396942138672, "logps_train/policy_2_2": -116.43014526367188, "logps_train/policy_2_w": -127.40019226074219, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -110.5, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 0.7371017932891846, "rewards_train/1-l": -2.0607423782348633, "rewards_train/1-w": 2.5814151763916016, "rewards_train/2-2": 2.810891628265381, "rewards_train/2-w": 1.2672069072723389, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.642157554626465, "rewards_train/margins_1": 1.844313383102417, "rewards_train/margins_2": 1.543684720993042, "step": 550 }, { "epoch": 1.65, "logps_train/policy_1_2": -112.89453125, "logps_train/policy_1_l": -143.99960327148438, "logps_train/policy_1_w": -62.60350036621094, "logps_train/policy_2_2": -88.46989440917969, "logps_train/policy_2_w": -84.74626922607422, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -102.0, "rewards_train/1-2": 1.5480468273162842, "rewards_train/1-l": -3.0530858039855957, "rewards_train/1-w": 2.7349627017974854, "rewards_train/2-2": 2.5655112266540527, "rewards_train/2-w": 1.7113105058670044, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.788048505783081, "rewards_train/margins_1": 1.1869158744812012, "rewards_train/margins_2": 0.8542007207870483, "step": 550 }, { "epoch": 1.65, "logps_train/policy_1_2": -106.28024291992188, "logps_train/policy_1_l": -140.42681884765625, "logps_train/policy_1_w": -68.8389663696289, "logps_train/policy_2_2": -86.9548568725586, "logps_train/policy_2_w": -80.04731750488281, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -88.0, "logps_train/ref_2_2": -110.5, "logps_train/ref_2_w": -94.0, "rewards_train/1-2": 1.6860384941101074, "rewards_train/1-l": -1.5840883255004883, "rewards_train/1-w": 1.8793842792510986, "rewards_train/2-2": 2.344357967376709, "rewards_train/2-w": 1.4265186786651611, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.463472604751587, "rewards_train/margins_1": 0.1933457851409912, "rewards_train/margins_2": 0.9178392887115479, "step": 550 }, { "epoch": 1.65, "logps_train/policy_1_2": -133.915283203125, "logps_train/policy_1_l": -148.27488708496094, "logps_train/policy_1_w": -106.65225219726562, "logps_train/policy_2_2": -96.3346176147461, "logps_train/policy_2_w": -140.05792236328125, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.1740972995758057, "rewards_train/1-l": -3.4462387561798096, "rewards_train/1-w": 2.3523526191711426, "rewards_train/2-2": 2.489975929260254, "rewards_train/2-w": 0.7848328351974487, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.798591375350952, "rewards_train/margins_1": 1.178255319595337, "rewards_train/margins_2": 1.7051430940628052, "step": 550 }, { "epoch": 1.65, "logps_train/policy_1_2": -189.9278564453125, "logps_train/policy_1_l": -178.07579040527344, "logps_train/policy_1_w": -135.10018920898438, "logps_train/policy_2_2": -141.59423828125, "logps_train/policy_2_w": -198.02896118164062, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 1.8763556480407715, "rewards_train/1-l": -1.9060161113739014, "rewards_train/1-w": 3.597795009613037, "rewards_train/2-2": 3.545653820037842, "rewards_train/2-w": 0.7230805158615112, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.5038111209869385, "rewards_train/margins_1": 1.7214393615722656, "rewards_train/margins_2": 2.8225733041763306, "step": 550 }, { "epoch": 1.65, "logps_train/policy_1_2": -168.33694458007812, "logps_train/policy_1_l": -132.86685180664062, "logps_train/policy_1_w": -85.5924301147461, "logps_train/policy_2_2": -128.289794921875, "logps_train/policy_2_w": -114.0439224243164, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 1.1944315433502197, "rewards_train/1-l": -1.2601219415664673, "rewards_train/1-w": 3.0704448223114014, "rewards_train/2-2": 2.964770793914795, "rewards_train/2-w": 2.083108425140381, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.330566763877869, "rewards_train/margins_1": 1.8760132789611816, "rewards_train/margins_2": 0.8816623687744141, "step": 551 }, { "epoch": 1.65, "logps_train/policy_1_2": -232.24496459960938, "logps_train/policy_1_l": -206.4581756591797, "logps_train/policy_1_w": -155.65692138671875, "logps_train/policy_2_2": -196.35504150390625, "logps_train/policy_2_w": -195.15377807617188, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -227.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.2255027294158936, "rewards_train/1-l": -2.534684896469116, "rewards_train/1-w": 3.4448561668395996, "rewards_train/2-2": 3.037933588027954, "rewards_train/2-w": 1.9768093824386597, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.979541063308716, "rewards_train/margins_1": 2.219353437423706, "rewards_train/margins_2": 1.0611242055892944, "step": 551 }, { "epoch": 1.65, "logps_train/policy_1_2": -148.22776794433594, "logps_train/policy_1_l": -137.92678833007812, "logps_train/policy_1_w": -63.481021881103516, "logps_train/policy_2_2": -119.55448913574219, "logps_train/policy_2_w": -90.7043685913086, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -81.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -100.0, "rewards_train/1-2": 1.370972990989685, "rewards_train/1-l": -2.264552593231201, "rewards_train/1-w": 1.7815852165222168, "rewards_train/2-2": 2.3284378051757812, "rewards_train/2-w": 0.9658915996551514, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.046137809753418, "rewards_train/margins_1": 0.41061222553253174, "rewards_train/margins_2": 1.3625462055206299, "step": 551 }, { "epoch": 1.65, "logps_train/policy_1_2": -196.77394104003906, "logps_train/policy_1_l": -210.597900390625, "logps_train/policy_1_w": -149.70794677734375, "logps_train/policy_2_2": -164.22825622558594, "logps_train/policy_2_w": -182.454345703125, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 2.4257302284240723, "rewards_train/1-l": -2.4629158973693848, "rewards_train/1-w": 3.3557677268981934, "rewards_train/2-2": 3.8302993774414062, "rewards_train/2-w": 2.1545639038085938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.818683624267578, "rewards_train/margins_1": 0.9300374984741211, "rewards_train/margins_2": 1.6757354736328125, "step": 551 }, { "epoch": 1.65, "logps_train/policy_1_2": -189.13058471679688, "logps_train/policy_1_l": -147.7275848388672, "logps_train/policy_1_w": -65.10426330566406, "logps_train/policy_2_2": -129.66439819335938, "logps_train/policy_2_w": -97.61876678466797, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -112.0, "rewards_train/1-2": 0.8150668144226074, "rewards_train/1-l": -2.4821338653564453, "rewards_train/1-w": 2.631761074066162, "rewards_train/2-2": 3.0335593223571777, "rewards_train/2-w": 1.4568731784820557, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.113894939422607, "rewards_train/margins_1": 1.8166942596435547, "rewards_train/margins_2": 1.576686143875122, "step": 551 }, { "epoch": 1.65, "logps_train/policy_1_2": -76.08712768554688, "logps_train/policy_1_l": -96.88639831542969, "logps_train/policy_1_w": -100.3299560546875, "logps_train/policy_2_2": -60.55290985107422, "logps_train/policy_2_w": -140.76292419433594, "logps_train/ref_1_2": -88.0, "logps_train/ref_1_l": -77.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -78.5, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.1901156902313232, "rewards_train/1-l": -1.9847337007522583, "rewards_train/1-w": 2.71309757232666, "rewards_train/2-2": 1.815803050994873, "rewards_train/2-w": 1.1518326997756958, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.6978312730789185, "rewards_train/margins_1": 1.522981882095337, "rewards_train/margins_2": 0.6639703512191772, "step": 551 }, { "epoch": 1.65, "logps_train/policy_1_2": -129.2437286376953, "logps_train/policy_1_l": -72.43637084960938, "logps_train/policy_1_w": -77.15717315673828, "logps_train/policy_2_2": -109.22300720214844, "logps_train/policy_2_w": -106.7945556640625, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -61.0, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.605313777923584, "rewards_train/1-l": -1.166146993637085, "rewards_train/1-w": 3.0151424407958984, "rewards_train/2-2": 2.3901991844177246, "rewards_train/2-w": 1.9627315998077393, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.181289434432983, "rewards_train/margins_1": 1.4098286628723145, "rewards_train/margins_2": 0.42746758460998535, "step": 551 }, { "epoch": 1.65, "logps_train/policy_1_2": -113.95945739746094, "logps_train/policy_1_l": -208.84841918945312, "logps_train/policy_1_w": -88.15194702148438, "logps_train/policy_2_2": -86.13609313964844, "logps_train/policy_2_w": -107.94654846191406, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.6368658542633057, "rewards_train/1-l": -3.382108688354492, "rewards_train/1-w": 2.643643379211426, "rewards_train/2-2": 2.586390972137451, "rewards_train/2-w": 1.4579821825027466, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.025752067565918, "rewards_train/margins_1": 1.0067775249481201, "rewards_train/margins_2": 1.1284087896347046, "step": 551 }, { "epoch": 1.65, "learning_rate": 4.127897076759399e-07, "loss": 0.4298, "step": 552 }, { "epoch": 1.65, "logps_train/policy_1_2": -98.9645767211914, "logps_train/policy_1_l": -138.76028442382812, "logps_train/policy_1_w": -79.23436737060547, "logps_train/policy_2_2": -71.3263168334961, "logps_train/policy_2_w": -118.8387680053711, "logps_train/ref_1_2": -107.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -88.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 0.7759054899215698, "rewards_train/1-l": -2.6147003173828125, "rewards_train/1-w": 2.4804697036743164, "rewards_train/2-2": 1.6925632953643799, "rewards_train/2-w": 1.0348734855651855, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.095170021057129, "rewards_train/margins_1": 1.7045642137527466, "rewards_train/margins_2": 0.6576898097991943, "step": 552 }, { "epoch": 1.65, "logps_train/policy_1_2": -132.28018188476562, "logps_train/policy_1_l": -144.15879821777344, "logps_train/policy_1_w": -111.78694915771484, "logps_train/policy_2_2": -101.91568756103516, "logps_train/policy_2_w": -142.79888916015625, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 2.182919502258301, "rewards_train/1-l": -1.9299428462982178, "rewards_train/1-w": 2.840054988861084, "rewards_train/2-2": 3.1178061962127686, "rewards_train/2-w": 1.9412055015563965, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.769997835159302, "rewards_train/margins_1": 0.6571354866027832, "rewards_train/margins_2": 1.176600694656372, "step": 552 }, { "epoch": 1.65, "logps_train/policy_1_2": -146.08999633789062, "logps_train/policy_1_l": -121.48439025878906, "logps_train/policy_1_w": -113.53121185302734, "logps_train/policy_2_2": -110.81317138671875, "logps_train/policy_2_w": -158.38088989257812, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -104.5, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": 1.9019378423690796, "rewards_train/1-l": -1.6847670078277588, "rewards_train/1-w": 3.1648473739624023, "rewards_train/2-2": 3.3233699798583984, "rewards_train/2-w": 1.4349578619003296, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.849614381790161, "rewards_train/margins_1": 1.2629095315933228, "rewards_train/margins_2": 1.8884121179580688, "step": 552 }, { "epoch": 1.65, "logps_train/policy_1_2": -206.65072631835938, "logps_train/policy_1_l": -219.79351806640625, "logps_train/policy_1_w": -160.95797729492188, "logps_train/policy_2_2": -176.62155151367188, "logps_train/policy_2_w": -191.92745971679688, "logps_train/ref_1_2": -233.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 2.5989906787872314, "rewards_train/1-l": -2.5154871940612793, "rewards_train/1-w": 3.494826078414917, "rewards_train/2-2": 3.8690946102142334, "rewards_train/2-w": 2.440065860748291, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.010313272476196, "rewards_train/margins_1": 0.8958353996276855, "rewards_train/margins_2": 1.4290287494659424, "step": 552 }, { "epoch": 1.65, "logps_train/policy_1_2": -176.8221435546875, "logps_train/policy_1_l": -193.87112426757812, "logps_train/policy_1_w": -137.19796752929688, "logps_train/policy_2_2": -128.884521484375, "logps_train/policy_2_w": -184.13702392578125, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 0.7892706394195557, "rewards_train/1-l": -2.13691782951355, "rewards_train/1-w": 2.948269844055176, "rewards_train/2-2": 3.24904727935791, "rewards_train/2-w": 1.2474305629730225, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.085187673568726, "rewards_train/margins_1": 2.15899920463562, "rewards_train/margins_2": 2.0016167163848877, "step": 552 }, { "epoch": 1.65, "logps_train/policy_1_2": -117.34265899658203, "logps_train/policy_1_l": -114.11679077148438, "logps_train/policy_1_w": -74.44499206542969, "logps_train/policy_2_2": -89.1947250366211, "logps_train/policy_2_w": -110.50099182128906, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 0.9492147564888, "rewards_train/1-l": -2.276522636413574, "rewards_train/1-w": 2.677375555038452, "rewards_train/2-2": 2.14315128326416, "rewards_train/2-w": 1.4936503171920776, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.953898191452026, "rewards_train/margins_1": 1.728160798549652, "rewards_train/margins_2": 0.6495009660720825, "step": 552 }, { "epoch": 1.65, "logps_train/policy_1_2": -120.29100036621094, "logps_train/policy_1_l": -238.77581787109375, "logps_train/policy_1_w": -108.75776672363281, "logps_train/policy_2_2": -99.24375915527344, "logps_train/policy_2_w": -135.15040588378906, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.8138689994812012, "rewards_train/1-l": -3.4758238792419434, "rewards_train/1-w": 2.614262819290161, "rewards_train/2-2": 2.5412497520446777, "rewards_train/2-w": 1.8458974361419678, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.0900866985321045, "rewards_train/margins_1": 0.80039381980896, "rewards_train/margins_2": 0.69535231590271, "step": 552 }, { "epoch": 1.65, "logps_train/policy_1_2": -160.64923095703125, "logps_train/policy_1_l": -212.07452392578125, "logps_train/policy_1_w": -138.2025146484375, "logps_train/policy_2_2": -124.703857421875, "logps_train/policy_2_w": -183.33950805664062, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.3960134983062744, "rewards_train/1-l": -2.5532517433166504, "rewards_train/1-w": 3.566466808319092, "rewards_train/2-2": 3.335864543914795, "rewards_train/2-w": 1.7347990274429321, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.119718551635742, "rewards_train/margins_1": 2.1704533100128174, "rewards_train/margins_2": 1.6010655164718628, "step": 552 }, { "epoch": 1.66, "logps_train/policy_1_2": -136.53025817871094, "logps_train/policy_1_l": -68.06405639648438, "logps_train/policy_1_w": -32.220706939697266, "logps_train/policy_2_2": -100.90125274658203, "logps_train/policy_2_w": -55.49798583984375, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -52.5, "logps_train/ref_1_w": -48.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -64.0, "rewards_train/1-2": 0.30381035804748535, "rewards_train/1-l": -1.545663595199585, "rewards_train/1-w": 1.5724607706069946, "rewards_train/2-2": 2.035266160964966, "rewards_train/2-w": 0.884576141834259, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.1181243658065796, "rewards_train/margins_1": 1.2686504125595093, "rewards_train/margins_2": 1.1506900191307068, "step": 553 }, { "epoch": 1.66, "logps_train/policy_1_2": -168.1842041015625, "logps_train/policy_1_l": -203.9766845703125, "logps_train/policy_1_w": -109.461669921875, "logps_train/policy_2_2": -125.47564697265625, "logps_train/policy_2_w": -161.17193603515625, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 2.4878294467926025, "rewards_train/1-l": -2.3480589389801025, "rewards_train/1-w": 3.0026612281799316, "rewards_train/2-2": 3.903998374938965, "rewards_train/2-w": 1.4437445402145386, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.350720167160034, "rewards_train/margins_1": 0.5148317813873291, "rewards_train/margins_2": 2.4602538347244263, "step": 553 }, { "epoch": 1.66, "logps_train/policy_1_2": -151.65008544921875, "logps_train/policy_1_l": -155.79946899414062, "logps_train/policy_1_w": -123.47642517089844, "logps_train/policy_2_2": -116.15569305419922, "logps_train/policy_2_w": -166.28720092773438, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.574834942817688, "rewards_train/1-l": -2.467641592025757, "rewards_train/1-w": 3.6039199829101562, "rewards_train/2-2": 2.9176340103149414, "rewards_train/2-w": 1.7322180271148682, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.071561574935913, "rewards_train/margins_1": 2.0290850400924683, "rewards_train/margins_2": 1.1854159832000732, "step": 553 }, { "epoch": 1.66, "logps_train/policy_1_2": -135.30990600585938, "logps_train/policy_1_l": -124.95637512207031, "logps_train/policy_1_w": -127.68600463867188, "logps_train/policy_2_2": -101.58895111083984, "logps_train/policy_2_w": -175.04751586914062, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 0.80533766746521, "rewards_train/1-l": -1.670637607574463, "rewards_train/1-w": 3.38608717918396, "rewards_train/2-2": 2.4012608528137207, "rewards_train/2-w": 1.3530614376068115, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.056724786758423, "rewards_train/margins_1": 2.58074951171875, "rewards_train/margins_2": 1.0481994152069092, "step": 553 }, { "epoch": 1.66, "logps_train/policy_1_2": -194.91262817382812, "logps_train/policy_1_l": -208.98101806640625, "logps_train/policy_1_w": -136.87527465820312, "logps_train/policy_2_2": -158.59561157226562, "logps_train/policy_2_w": -167.1494140625, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 2.1528773307800293, "rewards_train/1-l": -3.738727569580078, "rewards_train/1-w": 3.034738302230835, "rewards_train/2-2": 3.887216091156006, "rewards_train/2-w": 1.8662112951278687, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.773465871810913, "rewards_train/margins_1": 0.8818609714508057, "rewards_train/margins_2": 2.021004796028137, "step": 553 }, { "epoch": 1.66, "logps_train/policy_1_2": -156.01324462890625, "logps_train/policy_1_l": -211.60574340820312, "logps_train/policy_1_w": -139.20343017578125, "logps_train/policy_2_2": -117.10432434082031, "logps_train/policy_2_w": -183.5430908203125, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.9500417709350586, "rewards_train/1-l": -2.6006133556365967, "rewards_train/1-w": 3.695281505584717, "rewards_train/2-2": 2.9744796752929688, "rewards_train/2-w": 1.8113166093826294, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.2958948612213135, "rewards_train/margins_1": 1.7452397346496582, "rewards_train/margins_2": 1.1631630659103394, "step": 553 }, { "epoch": 1.66, "logps_train/policy_1_2": -106.360107421875, "logps_train/policy_1_l": -118.16886901855469, "logps_train/policy_1_w": -108.90869140625, "logps_train/policy_2_2": -77.82585144042969, "logps_train/policy_2_w": -147.591552734375, "logps_train/ref_1_2": -122.0, "logps_train/ref_1_l": -97.5, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.5593013763427734, "rewards_train/1-l": -2.083245277404785, "rewards_train/1-w": 2.8446779251098633, "rewards_train/2-2": 2.487727165222168, "rewards_train/2-w": 1.0252196788787842, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.927923202514648, "rewards_train/margins_1": 1.2853765487670898, "rewards_train/margins_2": 1.4625074863433838, "step": 553 }, { "epoch": 1.66, "logps_train/policy_1_2": -183.69046020507812, "logps_train/policy_1_l": -127.60588836669922, "logps_train/policy_1_w": -129.49658203125, "logps_train/policy_2_2": -144.6035614013672, "logps_train/policy_2_w": -160.48086547851562, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.2606428861618042, "rewards_train/1-l": -1.6033620834350586, "rewards_train/1-w": 3.0558111667633057, "rewards_train/2-2": 2.8771443367004395, "rewards_train/2-w": 1.7612884044647217, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.659173250198364, "rewards_train/margins_1": 1.7951682806015015, "rewards_train/margins_2": 1.1158559322357178, "step": 553 }, { "epoch": 1.66, "learning_rate": 3.992973370223896e-07, "loss": 0.4027, "step": 554 }, { "epoch": 1.66, "logps_train/policy_1_2": -81.99034118652344, "logps_train/policy_1_l": -71.86660766601562, "logps_train/policy_1_w": -37.07209396362305, "logps_train/policy_2_2": -61.138370513916016, "logps_train/policy_2_w": -56.30280303955078, "logps_train/ref_1_2": -87.5, "logps_train/ref_1_l": -52.0, "logps_train/ref_1_w": -54.5, "logps_train/ref_2_2": -78.0, "logps_train/ref_2_w": -65.5, "rewards_train/1-2": 0.5457408428192139, "rewards_train/1-l": -1.9896883964538574, "rewards_train/1-w": 1.7474780082702637, "rewards_train/2-2": 1.6720390319824219, "rewards_train/2-w": 0.9179621338844299, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.737166404724121, "rewards_train/margins_1": 1.2017371654510498, "rewards_train/margins_2": 0.7540768980979919, "step": 554 }, { "epoch": 1.66, "logps_train/policy_1_2": -193.670654296875, "logps_train/policy_1_l": -156.611083984375, "logps_train/policy_1_w": -117.20747375488281, "logps_train/policy_2_2": -148.21133422851562, "logps_train/policy_2_w": -162.96621704101562, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": 0.886058509349823, "rewards_train/1-l": -2.2638421058654785, "rewards_train/1-w": 3.3230018615722656, "rewards_train/2-2": 2.6868739128112793, "rewards_train/2-w": 1.390879511833191, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.586843967437744, "rewards_train/margins_1": 2.4369433522224426, "rewards_train/margins_2": 1.2959944009780884, "step": 554 }, { "epoch": 1.66, "logps_train/policy_1_2": -141.81463623046875, "logps_train/policy_1_l": -183.51443481445312, "logps_train/policy_1_w": -69.11306762695312, "logps_train/policy_2_2": -116.3948745727539, "logps_train/policy_2_w": -98.54194641113281, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -99.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 1.31697416305542, "rewards_train/1-l": -2.536989212036133, "rewards_train/1-w": 2.980490207672119, "rewards_train/2-2": 2.2765278816223145, "rewards_train/2-w": 2.1426806449890137, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.517479419708252, "rewards_train/margins_1": 1.6635160446166992, "rewards_train/margins_2": 0.13384723663330078, "step": 554 }, { "epoch": 1.66, "logps_train/policy_1_2": -145.13829040527344, "logps_train/policy_1_l": -152.0101318359375, "logps_train/policy_1_w": -94.29075622558594, "logps_train/policy_2_2": -113.36337280273438, "logps_train/policy_2_w": -136.08294677734375, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 1.5619523525238037, "rewards_train/1-l": -2.273475170135498, "rewards_train/1-w": 3.199049472808838, "rewards_train/2-2": 2.958975076675415, "rewards_train/2-w": 1.3245183229446411, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.472524642944336, "rewards_train/margins_1": 1.6370971202850342, "rewards_train/margins_2": 1.634456753730774, "step": 554 }, { "epoch": 1.66, "logps_train/policy_1_2": -154.97677612304688, "logps_train/policy_1_l": -159.943359375, "logps_train/policy_1_w": -150.35528564453125, "logps_train/policy_2_2": -125.70526885986328, "logps_train/policy_2_w": -189.30999755859375, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.780447006225586, "rewards_train/1-l": -1.593945026397705, "rewards_train/1-w": 3.698065757751465, "rewards_train/2-2": 3.1025197505950928, "rewards_train/2-w": 1.8713449239730835, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.29201078414917, "rewards_train/margins_1": 1.917618751525879, "rewards_train/margins_2": 1.2311748266220093, "step": 554 }, { "epoch": 1.66, "logps_train/policy_1_2": -127.0728988647461, "logps_train/policy_1_l": -87.84098815917969, "logps_train/policy_1_w": -93.68909454345703, "logps_train/policy_2_2": -86.67361450195312, "logps_train/policy_2_w": -135.5359344482422, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -67.5, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.9052107930183411, "rewards_train/1-l": -2.035661220550537, "rewards_train/1-w": 2.9623398780822754, "rewards_train/2-2": 2.590451240539551, "rewards_train/2-w": 0.9839063882827759, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.9980010986328125, "rewards_train/margins_1": 2.0571290850639343, "rewards_train/margins_2": 1.606544852256775, "step": 554 }, { "epoch": 1.66, "logps_train/policy_1_2": -161.41531372070312, "logps_train/policy_1_l": -177.8345947265625, "logps_train/policy_1_w": -131.73123168945312, "logps_train/policy_2_2": -131.12887573242188, "logps_train/policy_2_w": -165.97439575195312, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 2.7678446769714355, "rewards_train/1-l": -1.8483033180236816, "rewards_train/1-w": 3.537814140319824, "rewards_train/2-2": 4.008987903594971, "rewards_train/2-w": 2.318185806274414, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.386117458343506, "rewards_train/margins_1": 0.7699694633483887, "rewards_train/margins_2": 1.6908020973205566, "step": 554 }, { "epoch": 1.66, "logps_train/policy_1_2": -140.6591796875, "logps_train/policy_1_l": -213.27587890625, "logps_train/policy_1_w": -179.560302734375, "logps_train/policy_2_2": -104.00296020507812, "logps_train/policy_2_w": -238.019287109375, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -244.0, "rewards_train/1-2": 1.5622071027755737, "rewards_train/1-l": -2.738914966583252, "rewards_train/1-w": 3.0920164585113525, "rewards_train/2-2": 2.687204122543335, "rewards_train/2-w": 0.5808833837509155, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.8309314250946045, "rewards_train/margins_1": 1.5298093557357788, "rewards_train/margins_2": 2.1063207387924194, "step": 554 }, { "epoch": 1.66, "logps_train/policy_1_2": -116.16310119628906, "logps_train/policy_1_l": -164.33401489257812, "logps_train/policy_1_w": -128.44613647460938, "logps_train/policy_2_2": -88.109130859375, "logps_train/policy_2_w": -191.2839813232422, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 1.0402331352233887, "rewards_train/1-l": -1.609865665435791, "rewards_train/1-w": 2.9772613048553467, "rewards_train/2-2": 2.114184856414795, "rewards_train/2-w": 1.0247256755828857, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.587126970291138, "rewards_train/margins_1": 1.937028169631958, "rewards_train/margins_2": 1.0894591808319092, "step": 555 }, { "epoch": 1.66, "logps_train/policy_1_2": -163.35153198242188, "logps_train/policy_1_l": -168.75881958007812, "logps_train/policy_1_w": -100.62022399902344, "logps_train/policy_2_2": -131.23963928222656, "logps_train/policy_2_w": -126.90682983398438, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 1.8304730653762817, "rewards_train/1-l": -2.23633074760437, "rewards_train/1-w": 2.6965718269348145, "rewards_train/2-2": 2.740098476409912, "rewards_train/2-w": 1.6335358619689941, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.932902574539185, "rewards_train/margins_1": 0.8660987615585327, "rewards_train/margins_2": 1.106562614440918, "step": 555 }, { "epoch": 1.66, "logps_train/policy_1_2": -160.97113037109375, "logps_train/policy_1_l": -193.58029174804688, "logps_train/policy_1_w": -120.71177673339844, "logps_train/policy_2_2": -125.47285461425781, "logps_train/policy_2_w": -159.96978759765625, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.474761724472046, "rewards_train/1-l": -1.1576387882232666, "rewards_train/1-w": 2.7171032428741455, "rewards_train/2-2": 3.003495931625366, "rewards_train/2-w": 1.571771502494812, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.874742031097412, "rewards_train/margins_1": 1.2423415184020996, "rewards_train/margins_2": 1.4317244291305542, "step": 555 }, { "epoch": 1.66, "logps_train/policy_1_2": -243.11795043945312, "logps_train/policy_1_l": -209.63815307617188, "logps_train/policy_1_w": -181.92108154296875, "logps_train/policy_2_2": -212.09100341796875, "logps_train/policy_2_w": -219.54159545898438, "logps_train/ref_1_2": -264.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -219.0, "logps_train/ref_2_2": -247.0, "logps_train/ref_2_w": -242.0, "rewards_train/1-2": 2.175705909729004, "rewards_train/1-l": -2.1950645446777344, "rewards_train/1-w": 3.732891798019409, "rewards_train/2-2": 3.4346494674682617, "rewards_train/2-w": 2.192715883255005, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.9279563426971436, "rewards_train/margins_1": 1.5571858882904053, "rewards_train/margins_2": 1.2419335842132568, "step": 555 }, { "epoch": 1.66, "logps_train/policy_1_2": -151.6055145263672, "logps_train/policy_1_l": -83.8868408203125, "logps_train/policy_1_w": -93.72904968261719, "logps_train/policy_2_2": -111.4805679321289, "logps_train/policy_2_w": -122.65339660644531, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 0.669136106967926, "rewards_train/1-l": -1.3945432901382446, "rewards_train/1-w": 2.160689115524292, "rewards_train/2-2": 2.5808489322662354, "rewards_train/2-w": 0.8370035886764526, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.5552324056625366, "rewards_train/margins_1": 1.491553008556366, "rewards_train/margins_2": 1.7438453435897827, "step": 555 }, { "epoch": 1.66, "logps_train/policy_1_2": -111.55007934570312, "logps_train/policy_1_l": -104.50971984863281, "logps_train/policy_1_w": -101.18843841552734, "logps_train/policy_2_2": -85.92048645019531, "logps_train/policy_2_w": -124.67394256591797, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 1.5145237445831299, "rewards_train/1-l": -1.6103477478027344, "rewards_train/1-w": 2.7803750038146973, "rewards_train/2-2": 2.4157636165618896, "rewards_train/2-w": 1.7958873510360718, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.390722751617432, "rewards_train/margins_1": 1.2658512592315674, "rewards_train/margins_2": 0.6198762655258179, "step": 555 }, { "epoch": 1.66, "logps_train/policy_1_2": -120.99224090576172, "logps_train/policy_1_l": -144.84507751464844, "logps_train/policy_1_w": -81.33670043945312, "logps_train/policy_2_2": -92.92240905761719, "logps_train/policy_2_w": -116.60504913330078, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -109.5, "logps_train/ref_2_2": -117.5, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.3363224267959595, "rewards_train/1-l": -2.284409761428833, "rewards_train/1-w": 2.7874233722686768, "rewards_train/2-2": 2.485396385192871, "rewards_train/2-w": 1.3707449436187744, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.07183313369751, "rewards_train/margins_1": 1.4511009454727173, "rewards_train/margins_2": 1.1146514415740967, "step": 555 }, { "epoch": 1.66, "logps_train/policy_1_2": -203.8832550048828, "logps_train/policy_1_l": -269.5711975097656, "logps_train/policy_1_w": -137.56561279296875, "logps_train/policy_2_2": -155.45257568359375, "logps_train/policy_2_w": -200.14263916015625, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.2304245233535767, "rewards_train/1-l": -4.375869274139404, "rewards_train/1-w": 4.38093900680542, "rewards_train/2-2": 3.5109920501708984, "rewards_train/2-w": 2.160735607147217, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 8.756808280944824, "rewards_train/margins_1": 3.1505144834518433, "rewards_train/margins_2": 1.3502564430236816, "step": 555 }, { "epoch": 1.66, "learning_rate": 3.860099912454346e-07, "loss": 0.4065, "step": 556 }, { "epoch": 1.66, "logps_train/policy_1_2": -127.21873474121094, "logps_train/policy_1_l": -114.32508850097656, "logps_train/policy_1_w": -72.81820678710938, "logps_train/policy_2_2": -96.79779052734375, "logps_train/policy_2_w": -102.20651245117188, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -99.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -118.5, "rewards_train/1-2": 0.6847673654556274, "rewards_train/1-l": -1.9532122611999512, "rewards_train/1-w": 2.6189608573913574, "rewards_train/2-2": 2.1626036167144775, "rewards_train/2-w": 1.6066932678222656, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.572173118591309, "rewards_train/margins_1": 1.93419349193573, "rewards_train/margins_2": 0.5559103488922119, "step": 556 }, { "epoch": 1.66, "logps_train/policy_1_2": -80.48218536376953, "logps_train/policy_1_l": -125.69278717041016, "logps_train/policy_1_w": -92.19202423095703, "logps_train/policy_2_2": -57.34836196899414, "logps_train/policy_2_w": -129.92227172851562, "logps_train/ref_1_2": -87.5, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.7064688801765442, "rewards_train/1-l": -1.8580845594406128, "rewards_train/1-w": 2.22542667388916, "rewards_train/2-2": 1.6540307998657227, "rewards_train/2-w": 0.3675381541252136, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.083511233329773, "rewards_train/margins_1": 1.518957793712616, "rewards_train/margins_2": 1.286492645740509, "step": 556 }, { "epoch": 1.66, "logps_train/policy_1_2": -135.35836791992188, "logps_train/policy_1_l": -194.5657958984375, "logps_train/policy_1_w": -132.97210693359375, "logps_train/policy_2_2": -106.8887939453125, "logps_train/policy_2_w": -172.47036743164062, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 2.0860378742218018, "rewards_train/1-l": -2.285290479660034, "rewards_train/1-w": 3.244781494140625, "rewards_train/2-2": 2.63211727142334, "rewards_train/2-w": 1.5971028804779053, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.530071973800659, "rewards_train/margins_1": 1.1587436199188232, "rewards_train/margins_2": 1.0350143909454346, "step": 556 }, { "epoch": 1.66, "logps_train/policy_1_2": -133.7374267578125, "logps_train/policy_1_l": -106.82659912109375, "logps_train/policy_1_w": -93.6095962524414, "logps_train/policy_2_2": -84.9490966796875, "logps_train/policy_2_w": -134.9406280517578, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.4840695858001709, "rewards_train/1-l": -1.94203519821167, "rewards_train/1-w": 2.6148221492767334, "rewards_train/2-2": 2.354308605194092, "rewards_train/2-w": 1.1215624809265137, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.556857347488403, "rewards_train/margins_1": 2.1307525634765625, "rewards_train/margins_2": 1.2327461242675781, "step": 556 }, { "epoch": 1.66, "logps_train/policy_1_2": -137.83724975585938, "logps_train/policy_1_l": -135.95956420898438, "logps_train/policy_1_w": -111.70367431640625, "logps_train/policy_2_2": -100.39913177490234, "logps_train/policy_2_w": -157.654296875, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 0.7444000244140625, "rewards_train/1-l": -1.7103116512298584, "rewards_train/1-w": 2.7351012229919434, "rewards_train/2-2": 2.4374303817749023, "rewards_train/2-w": 0.7306636571884155, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.445412874221802, "rewards_train/margins_1": 1.9907011985778809, "rewards_train/margins_2": 1.7067667245864868, "step": 556 }, { "epoch": 1.66, "logps_train/policy_1_2": -161.57278442382812, "logps_train/policy_1_l": -162.7090301513672, "logps_train/policy_1_w": -96.44557189941406, "logps_train/policy_2_2": -125.37236022949219, "logps_train/policy_2_w": -122.664306640625, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 1.908346176147461, "rewards_train/1-l": -2.3736367225646973, "rewards_train/1-w": 3.4312243461608887, "rewards_train/2-2": 3.253389358520508, "rewards_train/2-w": 2.0429446697235107, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.804861068725586, "rewards_train/margins_1": 1.5228781700134277, "rewards_train/margins_2": 1.210444688796997, "step": 556 }, { "epoch": 1.66, "logps_train/policy_1_2": -111.72087097167969, "logps_train/policy_1_l": -133.42776489257812, "logps_train/policy_1_w": -108.26871490478516, "logps_train/policy_2_2": -91.57112121582031, "logps_train/policy_2_w": -134.11648559570312, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.5576002597808838, "rewards_train/1-l": -2.168948173522949, "rewards_train/1-w": 2.649690628051758, "rewards_train/2-2": 2.4475760459899902, "rewards_train/2-w": 1.3988993167877197, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.818638801574707, "rewards_train/margins_1": 1.092090368270874, "rewards_train/margins_2": 1.0486767292022705, "step": 556 }, { "epoch": 1.66, "logps_train/policy_1_2": -134.53952026367188, "logps_train/policy_1_l": -166.1982421875, "logps_train/policy_1_w": -109.05064392089844, "logps_train/policy_2_2": -115.09685516357422, "logps_train/policy_2_w": -129.8883056640625, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 1.7546420097351074, "rewards_train/1-l": -2.164379596710205, "rewards_train/1-w": 2.8832168579101562, "rewards_train/2-2": 2.479376792907715, "rewards_train/2-w": 2.1377310752868652, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.047596454620361, "rewards_train/margins_1": 1.1285748481750488, "rewards_train/margins_2": 0.3416457176208496, "step": 556 }, { "epoch": 1.67, "logps_train/policy_1_2": -147.89190673828125, "logps_train/policy_1_l": -109.10700225830078, "logps_train/policy_1_w": -94.47730255126953, "logps_train/policy_2_2": -107.55355834960938, "logps_train/policy_2_w": -120.91411590576172, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.4303412437438965, "rewards_train/1-l": -1.874763011932373, "rewards_train/1-w": 3.051098108291626, "rewards_train/2-2": 3.3173000812530518, "rewards_train/2-w": 1.9984327554702759, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.925861120223999, "rewards_train/margins_1": 1.6207568645477295, "rewards_train/margins_2": 1.3188673257827759, "step": 557 }, { "epoch": 1.67, "logps_train/policy_1_2": -128.87539672851562, "logps_train/policy_1_l": -162.54327392578125, "logps_train/policy_1_w": -80.75059509277344, "logps_train/policy_2_2": -93.51585388183594, "logps_train/policy_2_w": -100.24498748779297, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": 1.2655842304229736, "rewards_train/1-l": -3.4875292778015137, "rewards_train/1-w": 2.799940586090088, "rewards_train/2-2": 2.9960713386535645, "rewards_train/2-w": 2.0270636081695557, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.287469863891602, "rewards_train/margins_1": 1.5343563556671143, "rewards_train/margins_2": 0.9690077304840088, "step": 557 }, { "epoch": 1.67, "logps_train/policy_1_2": -132.6263885498047, "logps_train/policy_1_l": -107.62869262695312, "logps_train/policy_1_w": -81.70516967773438, "logps_train/policy_2_2": -101.4561767578125, "logps_train/policy_2_w": -108.88069915771484, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -87.5, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 1.4967358112335205, "rewards_train/1-l": -2.002713441848755, "rewards_train/1-w": 2.6107335090637207, "rewards_train/2-2": 2.996570110321045, "rewards_train/2-w": 1.8416180610656738, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.613446950912476, "rewards_train/margins_1": 1.1139976978302002, "rewards_train/margins_2": 1.154952049255371, "step": 557 }, { "epoch": 1.67, "logps_train/policy_1_2": -109.71990203857422, "logps_train/policy_1_l": -159.9140167236328, "logps_train/policy_1_w": -88.9442138671875, "logps_train/policy_2_2": -79.81988525390625, "logps_train/policy_2_w": -111.86695861816406, "logps_train/ref_1_2": -122.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -112.5, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 1.2389471530914307, "rewards_train/1-l": -2.0192344188690186, "rewards_train/1-w": 2.332923173904419, "rewards_train/2-2": 2.3242621421813965, "rewards_train/2-w": 1.2980693578720093, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.3521575927734375, "rewards_train/margins_1": 1.0939760208129883, "rewards_train/margins_2": 1.0261927843093872, "step": 557 }, { "epoch": 1.67, "logps_train/policy_1_2": -122.69403839111328, "logps_train/policy_1_l": -176.86398315429688, "logps_train/policy_1_w": -135.5908203125, "logps_train/policy_2_2": -97.35140991210938, "logps_train/policy_2_w": -173.65481567382812, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.8618463277816772, "rewards_train/1-l": -1.8065143823623657, "rewards_train/1-w": 3.45654296875, "rewards_train/2-2": 2.834390163421631, "rewards_train/2-w": 1.7313934564590454, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.263057351112366, "rewards_train/margins_1": 1.5946966409683228, "rewards_train/margins_2": 1.1029967069625854, "step": 557 }, { "epoch": 1.67, "logps_train/policy_1_2": -168.58541870117188, "logps_train/policy_1_l": -207.15821838378906, "logps_train/policy_1_w": -147.291259765625, "logps_train/policy_2_2": -131.47964477539062, "logps_train/policy_2_w": -193.93731689453125, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 2.168020248413086, "rewards_train/1-l": -2.4345717430114746, "rewards_train/1-w": 3.6286869049072266, "rewards_train/2-2": 3.5293798446655273, "rewards_train/2-w": 1.9738458395004272, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.063258647918701, "rewards_train/margins_1": 1.4606666564941406, "rewards_train/margins_2": 1.5555340051651, "step": 557 }, { "epoch": 1.67, "logps_train/policy_1_2": -119.24136352539062, "logps_train/policy_1_l": -162.5220489501953, "logps_train/policy_1_w": -115.89129638671875, "logps_train/policy_2_2": -81.35173034667969, "logps_train/policy_2_w": -161.27749633789062, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -107.5, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 1.1024255752563477, "rewards_train/1-l": -1.838143229484558, "rewards_train/1-w": 2.7885069847106934, "rewards_train/2-2": 2.592951774597168, "rewards_train/2-w": 1.3948092460632324, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.6266502141952515, "rewards_train/margins_1": 1.6860814094543457, "rewards_train/margins_2": 1.1981425285339355, "step": 557 }, { "epoch": 1.67, "logps_train/policy_1_2": -171.6829833984375, "logps_train/policy_1_l": -194.0953369140625, "logps_train/policy_1_w": -151.36705017089844, "logps_train/policy_2_2": -141.40475463867188, "logps_train/policy_2_w": -180.859619140625, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.9988884925842285, "rewards_train/1-l": -1.7415634393692017, "rewards_train/1-w": 3.3632946014404297, "rewards_train/2-2": 3.1415555477142334, "rewards_train/2-w": 1.9554436206817627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.104858040809631, "rewards_train/margins_1": 1.3644061088562012, "rewards_train/margins_2": 1.1861119270324707, "step": 557 }, { "epoch": 1.67, "learning_rate": 3.729289671665998e-07, "loss": 0.3851, "step": 558 }, { "epoch": 1.67, "logps_train/policy_1_2": -257.4533996582031, "logps_train/policy_1_l": -171.35916137695312, "logps_train/policy_1_w": -179.3060302734375, "logps_train/policy_2_2": -210.45648193359375, "logps_train/policy_2_w": -248.14248657226562, "logps_train/ref_1_2": -280.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -256.0, "logps_train/ref_2_w": -258.0, "rewards_train/1-2": 2.2140355110168457, "rewards_train/1-l": -2.0359158515930176, "rewards_train/1-w": 3.955333709716797, "rewards_train/2-2": 4.529351234436035, "rewards_train/2-w": 1.0701271295547485, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.9912495613098145, "rewards_train/margins_1": 1.7412981986999512, "rewards_train/margins_2": 3.4592241048812866, "step": 558 }, { "epoch": 1.67, "logps_train/policy_1_2": -129.1991729736328, "logps_train/policy_1_l": -175.16465759277344, "logps_train/policy_1_w": -102.78192138671875, "logps_train/policy_2_2": -94.42915344238281, "logps_train/policy_2_w": -151.38360595703125, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.1105509996414185, "rewards_train/1-l": -2.3842391967773438, "rewards_train/1-w": 3.2343077659606934, "rewards_train/2-2": 2.526615858078003, "rewards_train/2-w": 1.2819514274597168, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.618546962738037, "rewards_train/margins_1": 2.123756766319275, "rewards_train/margins_2": 1.2446644306182861, "step": 558 }, { "epoch": 1.67, "logps_train/policy_1_2": -116.94548034667969, "logps_train/policy_1_l": -145.81674194335938, "logps_train/policy_1_w": -124.70416259765625, "logps_train/policy_2_2": -85.030517578125, "logps_train/policy_2_w": -179.48797607421875, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 0.9773268103599548, "rewards_train/1-l": -1.1578468084335327, "rewards_train/1-w": 2.9811465740203857, "rewards_train/2-2": 2.0039801597595215, "rewards_train/2-w": 0.8824511170387268, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.1389933824539185, "rewards_train/margins_1": 2.003819763660431, "rewards_train/margins_2": 1.1215290427207947, "step": 558 }, { "epoch": 1.67, "logps_train/policy_1_2": -200.47959899902344, "logps_train/policy_1_l": -235.09490966796875, "logps_train/policy_1_w": -96.68850708007812, "logps_train/policy_2_2": -159.90086364746094, "logps_train/policy_2_w": -130.38217163085938, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 1.7832894325256348, "rewards_train/1-l": -4.112615585327148, "rewards_train/1-w": 2.8381800651550293, "rewards_train/2-2": 3.4833507537841797, "rewards_train/2-w": 1.4438148736953735, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.950795650482178, "rewards_train/margins_1": 1.0548906326293945, "rewards_train/margins_2": 2.039535880088806, "step": 558 }, { "epoch": 1.67, "logps_train/policy_1_2": -123.662109375, "logps_train/policy_1_l": -152.6943359375, "logps_train/policy_1_w": -63.87653732299805, "logps_train/policy_2_2": -89.3841552734375, "logps_train/policy_2_w": -96.84933471679688, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -112.5, "rewards_train/1-2": 1.2962887287139893, "rewards_train/1-l": -2.768847703933716, "rewards_train/1-w": 2.4302663803100586, "rewards_train/2-2": 2.8803348541259766, "rewards_train/2-w": 1.5524693727493286, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.199114084243774, "rewards_train/margins_1": 1.1339776515960693, "rewards_train/margins_2": 1.327865481376648, "step": 558 }, { "epoch": 1.67, "logps_train/policy_1_2": -195.54345703125, "logps_train/policy_1_l": -131.25845336914062, "logps_train/policy_1_w": -115.36979675292969, "logps_train/policy_2_2": -144.9527130126953, "logps_train/policy_2_w": -151.853271484375, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.2144036293029785, "rewards_train/1-l": -2.4761881828308105, "rewards_train/1-w": 3.4016919136047363, "rewards_train/2-2": 3.826603412628174, "rewards_train/2-w": 1.8185789585113525, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.877880096435547, "rewards_train/margins_1": 2.187288284301758, "rewards_train/margins_2": 2.0080244541168213, "step": 558 }, { "epoch": 1.67, "logps_train/policy_1_2": -158.2181854248047, "logps_train/policy_1_l": -144.89346313476562, "logps_train/policy_1_w": -103.26276397705078, "logps_train/policy_2_2": -132.29550170898438, "logps_train/policy_2_w": -125.54519653320312, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 2.2656807899475098, "rewards_train/1-l": -2.6452064514160156, "rewards_train/1-w": 2.6627864837646484, "rewards_train/2-2": 3.4485747814178467, "rewards_train/2-w": 1.726730227470398, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.307992935180664, "rewards_train/margins_1": 0.39710569381713867, "rewards_train/margins_2": 1.7218445539474487, "step": 558 }, { "epoch": 1.67, "logps_train/policy_1_2": -178.446533203125, "logps_train/policy_1_l": -121.77903747558594, "logps_train/policy_1_w": -144.12689208984375, "logps_train/policy_2_2": -145.42092895507812, "logps_train/policy_2_w": -198.73226928710938, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 1.1819090843200684, "rewards_train/1-l": -1.5079822540283203, "rewards_train/1-w": 3.8673887252807617, "rewards_train/2-2": 2.4731407165527344, "rewards_train/2-w": 1.7970855236053467, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.375370979309082, "rewards_train/margins_1": 2.6854796409606934, "rewards_train/margins_2": 0.6760551929473877, "step": 558 }, { "epoch": 1.67, "logps_train/policy_1_2": -181.61537170410156, "logps_train/policy_1_l": -261.53814697265625, "logps_train/policy_1_w": -140.01583862304688, "logps_train/policy_2_2": -148.54415893554688, "logps_train/policy_2_w": -180.8256072998047, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -237.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 2.050962448120117, "rewards_train/1-l": -2.480376720428467, "rewards_train/1-w": 3.1800572872161865, "rewards_train/2-2": 3.4487099647521973, "rewards_train/2-w": 1.714313268661499, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.660434007644653, "rewards_train/margins_1": 1.1290948390960693, "rewards_train/margins_2": 1.7343966960906982, "step": 559 }, { "epoch": 1.67, "logps_train/policy_1_2": -90.30859375, "logps_train/policy_1_l": -158.91477966308594, "logps_train/policy_1_w": -98.69800567626953, "logps_train/policy_2_2": -65.34638214111328, "logps_train/policy_2_w": -122.96697998046875, "logps_train/ref_1_2": -106.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -86.5, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.551952600479126, "rewards_train/1-l": -2.236131429672241, "rewards_train/1-w": 2.285668134689331, "rewards_train/2-2": 2.096611976623535, "rewards_train/2-w": 1.1079893112182617, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.521799564361572, "rewards_train/margins_1": 0.7337155342102051, "rewards_train/margins_2": 0.9886226654052734, "step": 559 }, { "epoch": 1.67, "logps_train/policy_1_2": -96.99373626708984, "logps_train/policy_1_l": -174.59300231933594, "logps_train/policy_1_w": -116.42994689941406, "logps_train/policy_2_2": -72.27084350585938, "logps_train/policy_2_w": -152.9202880859375, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.489688754081726, "rewards_train/1-l": -2.119065761566162, "rewards_train/1-w": 3.083177328109741, "rewards_train/2-2": 2.1408843994140625, "rewards_train/2-w": 1.156212329864502, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.202243089675903, "rewards_train/margins_1": 1.5934885740280151, "rewards_train/margins_2": 0.9846720695495605, "step": 559 }, { "epoch": 1.67, "logps_train/policy_1_2": -174.34471130371094, "logps_train/policy_1_l": -212.0654296875, "logps_train/policy_1_w": -110.77037048339844, "logps_train/policy_2_2": -132.30661010742188, "logps_train/policy_2_w": -155.6316680908203, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 0.6061535477638245, "rewards_train/1-l": -4.352832794189453, "rewards_train/1-w": 3.242494583129883, "rewards_train/2-2": 2.788869857788086, "rewards_train/2-w": 0.42120862007141113, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.595327377319336, "rewards_train/margins_1": 2.6363410353660583, "rewards_train/margins_2": 2.367661237716675, "step": 559 }, { "epoch": 1.67, "logps_train/policy_1_2": -151.96951293945312, "logps_train/policy_1_l": -138.93995666503906, "logps_train/policy_1_w": -111.68439483642578, "logps_train/policy_2_2": -121.6231918334961, "logps_train/policy_2_w": -134.74220275878906, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.5071510076522827, "rewards_train/1-l": -1.9234886169433594, "rewards_train/1-w": 2.927263021469116, "rewards_train/2-2": 2.4053564071655273, "rewards_train/2-w": 1.8609364032745361, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.850751638412476, "rewards_train/margins_1": 1.4201120138168335, "rewards_train/margins_2": 0.5444200038909912, "step": 559 }, { "epoch": 1.67, "logps_train/policy_1_2": -198.57191467285156, "logps_train/policy_1_l": -184.90972900390625, "logps_train/policy_1_w": -112.16265869140625, "logps_train/policy_2_2": -153.96334838867188, "logps_train/policy_2_w": -157.70822143554688, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.564683198928833, "rewards_train/1-l": -2.2159719467163086, "rewards_train/1-w": 3.8681087493896484, "rewards_train/2-2": 3.805227279663086, "rewards_train/2-w": 2.085428237915039, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.084080696105957, "rewards_train/margins_1": 2.3034255504608154, "rewards_train/margins_2": 1.7197990417480469, "step": 559 }, { "epoch": 1.67, "logps_train/policy_1_2": -149.5679931640625, "logps_train/policy_1_l": -280.99786376953125, "logps_train/policy_1_w": -167.21817016601562, "logps_train/policy_2_2": -112.1115951538086, "logps_train/policy_2_w": -226.7581787109375, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -260.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 2.2217161655426025, "rewards_train/1-l": -2.090900421142578, "rewards_train/1-w": 2.8214447498321533, "rewards_train/2-2": 3.247727394104004, "rewards_train/2-w": 0.6864871978759766, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.9123451709747314, "rewards_train/margins_1": 0.5997285842895508, "rewards_train/margins_2": 2.5612401962280273, "step": 559 }, { "epoch": 1.67, "logps_train/policy_1_2": -148.28695678710938, "logps_train/policy_1_l": -166.23101806640625, "logps_train/policy_1_w": -133.0941162109375, "logps_train/policy_2_2": -106.57225799560547, "logps_train/policy_2_w": -181.68505859375, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.2275549173355103, "rewards_train/1-l": -2.194488763809204, "rewards_train/1-w": 3.009338855743408, "rewards_train/2-2": 2.9052743911743164, "rewards_train/2-w": 1.0533695220947266, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.203827619552612, "rewards_train/margins_1": 1.781783938407898, "rewards_train/margins_2": 1.8519048690795898, "step": 559 }, { "epoch": 1.68, "learning_rate": 3.6005554147077406e-07, "loss": 0.3757, "step": 560 }, { "epoch": 1.68, "logps_train/policy_1_2": -216.42437744140625, "logps_train/policy_1_l": -322.1888732910156, "logps_train/policy_1_w": -132.9065704345703, "logps_train/policy_2_2": -175.9689178466797, "logps_train/policy_2_w": -173.10067749023438, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -280.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -211.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.7356879711151123, "rewards_train/1-l": -4.311074256896973, "rewards_train/1-w": 3.796844005584717, "rewards_train/2-2": 3.4984207153320312, "rewards_train/2-w": 2.2711825370788574, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 8.10791826248169, "rewards_train/margins_1": 2.0611560344696045, "rewards_train/margins_2": 1.2272381782531738, "step": 560 }, { "epoch": 1.68, "logps_train/policy_1_2": -180.6978302001953, "logps_train/policy_1_l": -150.12396240234375, "logps_train/policy_1_w": -110.45248413085938, "logps_train/policy_2_2": -144.0093994140625, "logps_train/policy_2_w": -144.5167694091797, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.357560157775879, "rewards_train/1-l": -2.204779624938965, "rewards_train/1-w": 2.715298652648926, "rewards_train/2-2": 2.677185535430908, "rewards_train/2-w": 1.5826988220214844, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.920078277587891, "rewards_train/margins_1": 1.3577384948730469, "rewards_train/margins_2": 1.0944867134094238, "step": 560 }, { "epoch": 1.68, "logps_train/policy_1_2": -106.93240356445312, "logps_train/policy_1_l": -133.7752685546875, "logps_train/policy_1_w": -51.03581237792969, "logps_train/policy_2_2": -86.59197998046875, "logps_train/policy_2_w": -65.02906036376953, "logps_train/ref_1_2": -114.5, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -66.0, "logps_train/ref_2_2": -104.5, "logps_train/ref_2_w": -73.5, "rewards_train/1-2": 0.7567592859268188, "rewards_train/1-l": -0.9302616119384766, "rewards_train/1-w": 1.5034500360488892, "rewards_train/2-2": 1.7736139297485352, "rewards_train/2-w": 0.8201403617858887, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.4337116479873657, "rewards_train/margins_1": 0.7466907501220703, "rewards_train/margins_2": 0.9534735679626465, "step": 560 }, { "epoch": 1.68, "logps_train/policy_1_2": -203.99671936035156, "logps_train/policy_1_l": -213.19549560546875, "logps_train/policy_1_w": -139.7941131591797, "logps_train/policy_2_2": -157.33604431152344, "logps_train/policy_2_w": -196.33718872070312, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -221.0, "rewards_train/1-2": 1.2284531593322754, "rewards_train/1-l": -2.4461123943328857, "rewards_train/1-w": 4.4346513748168945, "rewards_train/2-2": 3.0538957118988037, "rewards_train/2-w": 2.4912829399108887, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.88076376914978, "rewards_train/margins_1": 3.206198215484619, "rewards_train/margins_2": 0.562612771987915, "step": 560 }, { "epoch": 1.68, "logps_train/policy_1_2": -136.22341918945312, "logps_train/policy_1_l": -229.49560546875, "logps_train/policy_1_w": -130.5037078857422, "logps_train/policy_2_2": -93.30133056640625, "logps_train/policy_2_w": -163.1842041015625, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.393282175064087, "rewards_train/1-l": -3.115185499191284, "rewards_train/1-w": 2.8340041637420654, "rewards_train/2-2": 2.769866943359375, "rewards_train/2-w": 1.4847052097320557, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.94918966293335, "rewards_train/margins_1": 1.4407219886779785, "rewards_train/margins_2": 1.2851617336273193, "step": 560 }, { "epoch": 1.68, "logps_train/policy_1_2": -187.87326049804688, "logps_train/policy_1_l": -158.51382446289062, "logps_train/policy_1_w": -168.70257568359375, "logps_train/policy_2_2": -141.02255249023438, "logps_train/policy_2_w": -215.69003295898438, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -205.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": 1.7095481157302856, "rewards_train/1-l": -2.0771634578704834, "rewards_train/1-w": 3.5969293117523193, "rewards_train/2-2": 3.7243082523345947, "rewards_train/2-w": 1.306779146194458, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.674092769622803, "rewards_train/margins_1": 1.8873811960220337, "rewards_train/margins_2": 2.4175291061401367, "step": 560 }, { "epoch": 1.68, "logps_train/policy_1_2": -128.01077270507812, "logps_train/policy_1_l": -181.69973754882812, "logps_train/policy_1_w": -90.3790283203125, "logps_train/policy_2_2": -102.40791320800781, "logps_train/policy_2_w": -117.0152587890625, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 1.385545015335083, "rewards_train/1-l": -3.413431406021118, "rewards_train/1-w": 2.3550662994384766, "rewards_train/2-2": 2.5995893478393555, "rewards_train/2-w": 1.1523802280426025, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.768497705459595, "rewards_train/margins_1": 0.9695212841033936, "rewards_train/margins_2": 1.447209119796753, "step": 560 }, { "epoch": 1.68, "logps_train/policy_1_2": -176.816162109375, "logps_train/policy_1_l": -183.2327117919922, "logps_train/policy_1_w": -145.12277221679688, "logps_train/policy_2_2": -128.79827880859375, "logps_train/policy_2_w": -189.068115234375, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.7683849334716797, "rewards_train/1-l": -1.5023736953735352, "rewards_train/1-w": 3.4627223014831543, "rewards_train/2-2": 3.6857967376708984, "rewards_train/2-w": 1.824437141418457, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.9650959968566895, "rewards_train/margins_1": 1.6943373680114746, "rewards_train/margins_2": 1.8613595962524414, "step": 560 }, { "epoch": 1.68, "logps_train/policy_1_2": -264.60546875, "logps_train/policy_1_l": -255.94015502929688, "logps_train/policy_1_w": -177.16722106933594, "logps_train/policy_2_2": -220.884033203125, "logps_train/policy_2_w": -245.26361083984375, "logps_train/ref_1_2": -284.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -258.0, "logps_train/ref_2_w": -266.0, "rewards_train/1-2": 1.8769543170928955, "rewards_train/1-l": -2.527804136276245, "rewards_train/1-w": 4.306324005126953, "rewards_train/2-2": 3.667847156524658, "rewards_train/2-w": 2.0587968826293945, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.834128141403198, "rewards_train/margins_1": 2.4293696880340576, "rewards_train/margins_2": 1.6090502738952637, "step": 561 }, { "epoch": 1.68, "logps_train/policy_1_2": -171.07131958007812, "logps_train/policy_1_l": -151.97689819335938, "logps_train/policy_1_w": -115.6313705444336, "logps_train/policy_2_2": -139.84458923339844, "logps_train/policy_2_w": -143.09707641601562, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 2.1741182804107666, "rewards_train/1-l": -1.5961264371871948, "rewards_train/1-w": 3.2853002548217773, "rewards_train/2-2": 3.5436654090881348, "rewards_train/2-w": 2.0809175968170166, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.881426692008972, "rewards_train/margins_1": 1.1111819744110107, "rewards_train/margins_2": 1.4627478122711182, "step": 561 }, { "epoch": 1.68, "logps_train/policy_1_2": -205.22125244140625, "logps_train/policy_1_l": -151.56959533691406, "logps_train/policy_1_w": -148.53977966308594, "logps_train/policy_2_2": -165.57920837402344, "logps_train/policy_2_w": -178.04440307617188, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.3744580745697021, "rewards_train/1-l": -1.2614519596099854, "rewards_train/1-w": 2.981666326522827, "rewards_train/2-2": 3.1705946922302246, "rewards_train/2-w": 1.9584509134292603, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.2431182861328125, "rewards_train/margins_1": 1.607208251953125, "rewards_train/margins_2": 1.2121437788009644, "step": 561 }, { "epoch": 1.68, "logps_train/policy_1_2": -140.92727661132812, "logps_train/policy_1_l": -195.3006591796875, "logps_train/policy_1_w": -154.58885192871094, "logps_train/policy_2_2": -104.94570922851562, "logps_train/policy_2_w": -207.17984008789062, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -225.0, "rewards_train/1-2": 2.2041468620300293, "rewards_train/1-l": -2.2829952239990234, "rewards_train/1-w": 3.9770522117614746, "rewards_train/2-2": 3.299179792404175, "rewards_train/2-w": 1.8038917779922485, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.260047435760498, "rewards_train/margins_1": 1.7729053497314453, "rewards_train/margins_2": 1.4952880144119263, "step": 561 }, { "epoch": 1.68, "logps_train/policy_1_2": -209.61090087890625, "logps_train/policy_1_l": -224.90725708007812, "logps_train/policy_1_w": -133.07691955566406, "logps_train/policy_2_2": -162.1630859375, "logps_train/policy_2_w": -178.9973602294922, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.2498477697372437, "rewards_train/1-l": -3.132911443710327, "rewards_train/1-w": 3.2329325675964355, "rewards_train/2-2": 3.171973466873169, "rewards_train/2-w": 1.5346397161483765, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.365844011306763, "rewards_train/margins_1": 1.983084797859192, "rewards_train/margins_2": 1.6373337507247925, "step": 561 }, { "epoch": 1.68, "logps_train/policy_1_2": -160.20077514648438, "logps_train/policy_1_l": -191.49581909179688, "logps_train/policy_1_w": -109.19340515136719, "logps_train/policy_2_2": -120.87495422363281, "logps_train/policy_2_w": -138.90493774414062, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.8748435974121094, "rewards_train/1-l": -2.230050802230835, "rewards_train/1-w": 3.1691362857818604, "rewards_train/2-2": 2.8043012619018555, "rewards_train/2-w": 2.3868491649627686, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.399187088012695, "rewards_train/margins_1": 1.294292688369751, "rewards_train/margins_2": 0.4174520969390869, "step": 561 }, { "epoch": 1.68, "logps_train/policy_1_2": -126.11575317382812, "logps_train/policy_1_l": -125.63784790039062, "logps_train/policy_1_w": -122.88436126708984, "logps_train/policy_2_2": -95.68190002441406, "logps_train/policy_2_w": -170.39752197265625, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.0759239196777344, "rewards_train/1-l": -1.855972409248352, "rewards_train/1-w": 3.2881267070770264, "rewards_train/2-2": 2.4568097591400146, "rewards_train/2-w": 1.3618104457855225, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.144099116325378, "rewards_train/margins_1": 2.212202787399292, "rewards_train/margins_2": 1.0949993133544922, "step": 561 }, { "epoch": 1.68, "logps_train/policy_1_2": -132.6780548095703, "logps_train/policy_1_l": -193.3368377685547, "logps_train/policy_1_w": -160.24127197265625, "logps_train/policy_2_2": -100.84135437011719, "logps_train/policy_2_w": -203.060302734375, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.740006685256958, "rewards_train/1-l": -2.5767998695373535, "rewards_train/1-w": 2.336613655090332, "rewards_train/2-2": 2.6646924018859863, "rewards_train/2-w": 0.6244372129440308, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.9134135246276855, "rewards_train/margins_1": 0.596606969833374, "rewards_train/margins_2": 2.0402551889419556, "step": 561 }, { "epoch": 1.68, "learning_rate": 3.4739097058161116e-07, "loss": 0.3829, "step": 562 }, { "epoch": 1.68, "logps_train/policy_1_2": -124.06217956542969, "logps_train/policy_1_l": -156.70681762695312, "logps_train/policy_1_w": -87.724609375, "logps_train/policy_2_2": -103.29939270019531, "logps_train/policy_2_w": -115.57954406738281, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.7500321865081787, "rewards_train/1-l": -1.7558872699737549, "rewards_train/1-w": 2.700488567352295, "rewards_train/2-2": 2.4388113021850586, "rewards_train/2-w": 1.8236857652664185, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.45637583732605, "rewards_train/margins_1": 0.9504563808441162, "rewards_train/margins_2": 0.6151255369186401, "step": 562 }, { "epoch": 1.68, "logps_train/policy_1_2": -176.89439392089844, "logps_train/policy_1_l": -218.3493194580078, "logps_train/policy_1_w": -110.6363525390625, "logps_train/policy_2_2": -141.67637634277344, "logps_train/policy_2_w": -139.9344024658203, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.4402474164962769, "rewards_train/1-l": -3.080829620361328, "rewards_train/1-w": 3.297301769256592, "rewards_train/2-2": 3.1987686157226562, "rewards_train/2-w": 2.306560516357422, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.37813138961792, "rewards_train/margins_1": 1.857054352760315, "rewards_train/margins_2": 0.8922080993652344, "step": 562 }, { "epoch": 1.68, "logps_train/policy_1_2": -134.47958374023438, "logps_train/policy_1_l": -196.72085571289062, "logps_train/policy_1_w": -91.13324737548828, "logps_train/policy_2_2": -94.86812591552734, "logps_train/policy_2_w": -138.69332885742188, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.1274328231811523, "rewards_train/1-l": -2.6181797981262207, "rewards_train/1-w": 3.2644097805023193, "rewards_train/2-2": 2.5915565490722656, "rewards_train/2-w": 1.4853553771972656, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.88258957862854, "rewards_train/margins_1": 2.136976957321167, "rewards_train/margins_2": 1.106201171875, "step": 562 }, { "epoch": 1.68, "logps_train/policy_1_2": -126.93213653564453, "logps_train/policy_1_l": -198.33709716796875, "logps_train/policy_1_w": -144.35057067871094, "logps_train/policy_2_2": -112.12396240234375, "logps_train/policy_2_w": -161.6400604248047, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": 1.8071768283843994, "rewards_train/1-l": -2.7553889751434326, "rewards_train/1-w": 2.5493180751800537, "rewards_train/2-2": 2.41719388961792, "rewards_train/2-w": 1.9031809568405151, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.304707050323486, "rewards_train/margins_1": 0.7421412467956543, "rewards_train/margins_2": 0.5140129327774048, "step": 562 }, { "epoch": 1.68, "logps_train/policy_1_2": -168.57284545898438, "logps_train/policy_1_l": -147.60597229003906, "logps_train/policy_1_w": -168.40313720703125, "logps_train/policy_2_2": -131.707763671875, "logps_train/policy_2_w": -234.8583984375, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.337636947631836, "rewards_train/1-l": -2.055323600769043, "rewards_train/1-w": 3.369060516357422, "rewards_train/2-2": 2.7690677642822266, "rewards_train/2-w": 0.4204105734825134, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.424384117126465, "rewards_train/margins_1": 2.031423568725586, "rewards_train/margins_2": 2.348657190799713, "step": 562 }, { "epoch": 1.68, "logps_train/policy_1_2": -248.3651123046875, "logps_train/policy_1_l": -218.126953125, "logps_train/policy_1_w": -142.5941162109375, "logps_train/policy_2_2": -197.6540069580078, "logps_train/policy_2_w": -191.9463653564453, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.254111886024475, "rewards_train/1-l": -2.187695264816284, "rewards_train/1-w": 3.7780888080596924, "rewards_train/2-2": 3.515850067138672, "rewards_train/2-w": 1.7772377729415894, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.965784072875977, "rewards_train/margins_1": 2.5239769220352173, "rewards_train/margins_2": 1.7386122941970825, "step": 562 }, { "epoch": 1.68, "logps_train/policy_1_2": -196.67652893066406, "logps_train/policy_1_l": -252.36888122558594, "logps_train/policy_1_w": -145.3643341064453, "logps_train/policy_2_2": -153.12881469726562, "logps_train/policy_2_w": -187.63662719726562, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.1659414768218994, "rewards_train/1-l": -2.9759507179260254, "rewards_train/1-w": 3.3067307472229004, "rewards_train/2-2": 2.8238377571105957, "rewards_train/2-w": 1.8320400714874268, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.282681465148926, "rewards_train/margins_1": 2.140789270401001, "rewards_train/margins_2": 0.991797685623169, "step": 562 }, { "epoch": 1.68, "logps_train/policy_1_2": -93.86787414550781, "logps_train/policy_1_l": -96.13606262207031, "logps_train/policy_1_w": -53.37836456298828, "logps_train/policy_2_2": -65.31690979003906, "logps_train/policy_2_w": -83.34391021728516, "logps_train/ref_1_2": -106.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -75.5, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": 1.1850874423980713, "rewards_train/1-l": -1.5179028511047363, "rewards_train/1-w": 2.195171356201172, "rewards_train/2-2": 2.039402484893799, "rewards_train/2-w": 0.9515464305877686, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.713074207305908, "rewards_train/margins_1": 1.0100839138031006, "rewards_train/margins_2": 1.0878560543060303, "step": 562 }, { "epoch": 1.69, "logps_train/policy_1_2": -77.10850524902344, "logps_train/policy_1_l": -82.11373901367188, "logps_train/policy_1_w": -60.6868896484375, "logps_train/policy_2_2": -53.35625076293945, "logps_train/policy_2_w": -96.12755584716797, "logps_train/ref_1_2": -91.0, "logps_train/ref_1_l": -67.5, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -75.5, "logps_train/ref_2_w": -106.5, "rewards_train/1-2": 1.3883683681488037, "rewards_train/1-l": -1.4439911842346191, "rewards_train/1-w": 2.5992798805236816, "rewards_train/2-2": 2.220820188522339, "rewards_train/2-w": 1.0474008321762085, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.043271064758301, "rewards_train/margins_1": 1.210911512374878, "rewards_train/margins_2": 1.1734193563461304, "step": 563 }, { "epoch": 1.69, "logps_train/policy_1_2": -187.41773986816406, "logps_train/policy_1_l": -161.63417053222656, "logps_train/policy_1_w": -88.55821228027344, "logps_train/policy_2_2": -142.15606689453125, "logps_train/policy_2_w": -118.3850326538086, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.1627187728881836, "rewards_train/1-l": -2.2915427684783936, "rewards_train/1-w": 2.5488662719726562, "rewards_train/2-2": 3.0019707679748535, "rewards_train/2-w": 1.4005590677261353, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.84040904045105, "rewards_train/margins_1": 1.3861474990844727, "rewards_train/margins_2": 1.6014117002487183, "step": 563 }, { "epoch": 1.69, "logps_train/policy_1_2": -152.59866333007812, "logps_train/policy_1_l": -149.62095642089844, "logps_train/policy_1_w": -113.77644348144531, "logps_train/policy_2_2": -117.54662322998047, "logps_train/policy_2_w": -146.89297485351562, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 2.3510704040527344, "rewards_train/1-l": -1.5269396305084229, "rewards_train/1-w": 3.689152956008911, "rewards_train/2-2": 3.5203380584716797, "rewards_train/2-w": 2.1958582401275635, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.216092586517334, "rewards_train/margins_1": 1.3380825519561768, "rewards_train/margins_2": 1.3244798183441162, "step": 563 }, { "epoch": 1.69, "logps_train/policy_1_2": -181.30007934570312, "logps_train/policy_1_l": -135.78871154785156, "logps_train/policy_1_w": -131.79876708984375, "logps_train/policy_2_2": -125.58903503417969, "logps_train/policy_2_w": -188.12417602539062, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -112.5, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.1574923992156982, "rewards_train/1-l": -2.3429338932037354, "rewards_train/1-w": 3.344341516494751, "rewards_train/2-2": 3.578597068786621, "rewards_train/2-w": 1.151645302772522, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.687275409698486, "rewards_train/margins_1": 2.1868491172790527, "rewards_train/margins_2": 2.426951766014099, "step": 563 }, { "epoch": 1.69, "logps_train/policy_1_2": -129.00497436523438, "logps_train/policy_1_l": -136.6365509033203, "logps_train/policy_1_w": -70.16333770751953, "logps_train/policy_2_2": -103.05720520019531, "logps_train/policy_2_w": -106.3798828125, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 1.5182521343231201, "rewards_train/1-l": -1.5765466690063477, "rewards_train/1-w": 2.5649166107177734, "rewards_train/2-2": 2.629240036010742, "rewards_train/2-w": 1.4463869333267212, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.141463279724121, "rewards_train/margins_1": 1.0466644763946533, "rewards_train/margins_2": 1.182853102684021, "step": 563 }, { "epoch": 1.69, "logps_train/policy_1_2": -108.46932220458984, "logps_train/policy_1_l": -157.07485961914062, "logps_train/policy_1_w": -47.78023147583008, "logps_train/policy_2_2": -76.63555908203125, "logps_train/policy_2_w": -71.9850082397461, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -69.0, "logps_train/ref_2_2": -101.5, "logps_train/ref_2_w": -87.5, "rewards_train/1-2": 1.148379921913147, "rewards_train/1-l": -3.089564085006714, "rewards_train/1-w": 2.1112351417541504, "rewards_train/2-2": 2.471210479736328, "rewards_train/2-w": 1.5514990091323853, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.200799226760864, "rewards_train/margins_1": 0.9628552198410034, "rewards_train/margins_2": 0.9197114706039429, "step": 563 }, { "epoch": 1.69, "logps_train/policy_1_2": -129.48443603515625, "logps_train/policy_1_l": -119.31640625, "logps_train/policy_1_w": -118.19760131835938, "logps_train/policy_2_2": -96.77835845947266, "logps_train/policy_2_w": -157.48324584960938, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.260931134223938, "rewards_train/1-l": -2.0914056301116943, "rewards_train/1-w": 3.1130528450012207, "rewards_train/2-2": 2.660054922103882, "rewards_train/2-w": 1.2946434020996094, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.204458475112915, "rewards_train/margins_1": 1.8521217107772827, "rewards_train/margins_2": 1.3654115200042725, "step": 563 }, { "epoch": 1.69, "logps_train/policy_1_2": -59.947669982910156, "logps_train/policy_1_l": -62.10752487182617, "logps_train/policy_1_w": -46.981224060058594, "logps_train/policy_2_2": -31.443119049072266, "logps_train/policy_2_w": -73.57955932617188, "logps_train/ref_1_2": -64.5, "logps_train/ref_1_l": -47.75, "logps_train/ref_1_w": -68.5, "logps_train/ref_2_2": -46.0, "logps_train/ref_2_w": -83.0, "rewards_train/1-2": 0.4833577275276184, "rewards_train/1-l": -1.4285259246826172, "rewards_train/1-w": 2.1452369689941406, "rewards_train/2-2": 1.4502193927764893, "rewards_train/2-w": 0.9787628650665283, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.573762893676758, "rewards_train/margins_1": 1.6618792414665222, "rewards_train/margins_2": 0.47145652770996094, "step": 563 }, { "epoch": 1.69, "learning_rate": 3.3493649053890325e-07, "loss": 0.449, "step": 564 }, { "epoch": 1.69, "logps_train/policy_1_2": -113.17485046386719, "logps_train/policy_1_l": -105.92796325683594, "logps_train/policy_1_w": -57.890411376953125, "logps_train/policy_2_2": -80.34602355957031, "logps_train/policy_2_w": -78.05936431884766, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -83.0, "logps_train/ref_2_2": -104.5, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": 0.6700153350830078, "rewards_train/1-l": -2.4099841117858887, "rewards_train/1-w": 2.5203335285186768, "rewards_train/2-2": 2.4247727394104004, "rewards_train/2-w": 1.4039266109466553, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.930317640304565, "rewards_train/margins_1": 1.850318193435669, "rewards_train/margins_2": 1.0208461284637451, "step": 564 }, { "epoch": 1.69, "logps_train/policy_1_2": -184.53857421875, "logps_train/policy_1_l": -162.72119140625, "logps_train/policy_1_w": -135.3743133544922, "logps_train/policy_2_2": -147.65591430664062, "logps_train/policy_2_w": -167.67808532714844, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.2273920774459839, "rewards_train/1-l": -2.438720941543579, "rewards_train/1-w": 3.1016318798065186, "rewards_train/2-2": 3.134408950805664, "rewards_train/2-w": 1.3673491477966309, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.540352821350098, "rewards_train/margins_1": 1.8742398023605347, "rewards_train/margins_2": 1.7670598030090332, "step": 564 }, { "epoch": 1.69, "logps_train/policy_1_2": -89.4884033203125, "logps_train/policy_1_l": -54.53099822998047, "logps_train/policy_1_w": -43.69780731201172, "logps_train/policy_2_2": -71.05580139160156, "logps_train/policy_2_w": -64.39470672607422, "logps_train/ref_1_2": -99.5, "logps_train/ref_1_l": -43.0, "logps_train/ref_1_w": -61.0, "logps_train/ref_2_2": -90.0, "logps_train/ref_2_w": -70.0, "rewards_train/1-2": 0.9917846322059631, "rewards_train/1-l": -1.1522209644317627, "rewards_train/1-w": 1.7530708312988281, "rewards_train/2-2": 1.906333565711975, "rewards_train/2-w": 0.5833806991577148, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.905291795730591, "rewards_train/margins_1": 0.761286199092865, "rewards_train/margins_2": 1.3229528665542603, "step": 564 }, { "epoch": 1.69, "logps_train/policy_1_2": -217.19837951660156, "logps_train/policy_1_l": -222.95663452148438, "logps_train/policy_1_w": -168.395751953125, "logps_train/policy_2_2": -168.97128295898438, "logps_train/policy_2_w": -230.11978149414062, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -203.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 2.084850549697876, "rewards_train/1-l": -2.906599998474121, "rewards_train/1-w": 3.461205005645752, "rewards_train/2-2": 4.065371513366699, "rewards_train/2-w": 0.8052088022232056, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.367805004119873, "rewards_train/margins_1": 1.376354455947876, "rewards_train/margins_2": 3.2601627111434937, "step": 564 }, { "epoch": 1.69, "logps_train/policy_1_2": -121.92350769042969, "logps_train/policy_1_l": -170.74647521972656, "logps_train/policy_1_w": -46.67320251464844, "logps_train/policy_2_2": -83.09223937988281, "logps_train/policy_2_w": -72.38765716552734, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -64.0, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -80.0, "rewards_train/1-2": 0.6711494326591492, "rewards_train/1-l": -2.496108055114746, "rewards_train/1-w": 1.7473528385162354, "rewards_train/2-2": 2.2548391819000244, "rewards_train/2-w": 0.7706096172332764, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.2434608936309814, "rewards_train/margins_1": 1.0762034058570862, "rewards_train/margins_2": 1.484229564666748, "step": 564 }, { "epoch": 1.69, "logps_train/policy_1_2": -157.90158081054688, "logps_train/policy_1_l": -194.53160095214844, "logps_train/policy_1_w": -96.85110473632812, "logps_train/policy_2_2": -120.1050796508789, "logps_train/policy_2_w": -130.2752685546875, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.6035927534103394, "rewards_train/1-l": -2.2387073040008545, "rewards_train/1-w": 3.1141085624694824, "rewards_train/2-2": 2.9582417011260986, "rewards_train/2-w": 1.7912224531173706, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.352815866470337, "rewards_train/margins_1": 1.510515809059143, "rewards_train/margins_2": 1.167019248008728, "step": 564 }, { "epoch": 1.69, "logps_train/policy_1_2": -198.91888427734375, "logps_train/policy_1_l": -152.79989624023438, "logps_train/policy_1_w": -152.45004272460938, "logps_train/policy_2_2": -146.70591735839844, "logps_train/policy_2_w": -205.40037536621094, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.9292049407958984, "rewards_train/1-l": -1.4491294622421265, "rewards_train/1-w": 4.234684944152832, "rewards_train/2-2": 4.043471336364746, "rewards_train/2-w": 1.7115248441696167, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.6838144063949585, "rewards_train/margins_1": 2.3054800033569336, "rewards_train/margins_2": 2.3319464921951294, "step": 564 }, { "epoch": 1.69, "logps_train/policy_1_2": -88.64979553222656, "logps_train/policy_1_l": -128.24395751953125, "logps_train/policy_1_w": -82.81697082519531, "logps_train/policy_2_2": -66.90791320800781, "logps_train/policy_2_w": -107.78499603271484, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -122.5, "rewards_train/1-2": 1.1084096431732178, "rewards_train/1-l": -0.7562320828437805, "rewards_train/1-w": 2.4636154174804688, "rewards_train/2-2": 1.8057904243469238, "rewards_train/2-w": 1.4918129444122314, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.2198475003242493, "rewards_train/margins_1": 1.355205774307251, "rewards_train/margins_2": 0.3139774799346924, "step": 564 }, { "epoch": 1.69, "logps_train/policy_1_2": -179.45550537109375, "logps_train/policy_1_l": -222.46466064453125, "logps_train/policy_1_w": -112.00847625732422, "logps_train/policy_2_2": -149.226318359375, "logps_train/policy_2_w": -137.54043579101562, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -191.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 2.0794501304626465, "rewards_train/1-l": -3.209747076034546, "rewards_train/1-w": 3.7444653511047363, "rewards_train/2-2": 3.500804901123047, "rewards_train/2-w": 2.956894874572754, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.954212427139282, "rewards_train/margins_1": 1.6650152206420898, "rewards_train/margins_2": 0.543910026550293, "step": 565 }, { "epoch": 1.69, "logps_train/policy_1_2": -202.44171142578125, "logps_train/policy_1_l": -162.96835327148438, "logps_train/policy_1_w": -132.5072784423828, "logps_train/policy_2_2": -141.3170166015625, "logps_train/policy_2_w": -192.44984436035156, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 0.8441108465194702, "rewards_train/1-l": -1.428085446357727, "rewards_train/1-w": 3.0602099895477295, "rewards_train/2-2": 3.216930866241455, "rewards_train/2-w": 1.519078016281128, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.4882954359054565, "rewards_train/margins_1": 2.2160991430282593, "rewards_train/margins_2": 1.6978528499603271, "step": 565 }, { "epoch": 1.69, "logps_train/policy_1_2": -162.60696411132812, "logps_train/policy_1_l": -153.275634765625, "logps_train/policy_1_w": -112.65443420410156, "logps_train/policy_2_2": -122.59181213378906, "logps_train/policy_2_w": -152.7629852294922, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.7799291610717773, "rewards_train/1-l": -1.911548376083374, "rewards_train/1-w": 3.059556722640991, "rewards_train/2-2": 3.354881763458252, "rewards_train/2-w": 1.9565144777297974, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.971105098724365, "rewards_train/margins_1": 1.2796275615692139, "rewards_train/margins_2": 1.3983672857284546, "step": 565 }, { "epoch": 1.69, "logps_train/policy_1_2": -126.66902923583984, "logps_train/policy_1_l": -133.1602325439453, "logps_train/policy_1_w": -157.73651123046875, "logps_train/policy_2_2": -102.24722290039062, "logps_train/policy_2_w": -186.9728546142578, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -116.5, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.780167579650879, "rewards_train/1-l": -1.6332111358642578, "rewards_train/1-w": 3.643145799636841, "rewards_train/2-2": 2.54168438911438, "rewards_train/2-w": 2.449589252471924, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.276356935501099, "rewards_train/margins_1": 1.862978219985962, "rewards_train/margins_2": 0.09209513664245605, "step": 565 }, { "epoch": 1.69, "logps_train/policy_1_2": -108.51448059082031, "logps_train/policy_1_l": -128.67050170898438, "logps_train/policy_1_w": -144.90542602539062, "logps_train/policy_2_2": -71.24864959716797, "logps_train/policy_2_w": -210.60821533203125, "logps_train/ref_1_2": -120.5, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -94.5, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.2016770839691162, "rewards_train/1-l": -1.8424408435821533, "rewards_train/1-w": 5.321957588195801, "rewards_train/2-2": 2.29935359954834, "rewards_train/2-w": 2.7977724075317383, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.164398431777954, "rewards_train/margins_1": 4.120280504226685, "rewards_train/margins_2": -0.49841880798339844, "step": 565 }, { "epoch": 1.69, "logps_train/policy_1_2": -116.22723388671875, "logps_train/policy_1_l": -98.87232208251953, "logps_train/policy_1_w": -98.93028259277344, "logps_train/policy_2_2": -76.71797180175781, "logps_train/policy_2_w": -144.56683349609375, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -101.5, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 0.7507140636444092, "rewards_train/1-l": -2.0872321128845215, "rewards_train/1-w": 2.366151809692383, "rewards_train/2-2": 2.479374408721924, "rewards_train/2-w": 0.49956709146499634, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.453383922576904, "rewards_train/margins_1": 1.6154377460479736, "rewards_train/margins_2": 1.9798073172569275, "step": 565 }, { "epoch": 1.69, "logps_train/policy_1_2": -90.99567413330078, "logps_train/policy_1_l": -122.47091674804688, "logps_train/policy_1_w": -89.83021545410156, "logps_train/policy_2_2": -70.32670593261719, "logps_train/policy_2_w": -133.51181030273438, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -115.5, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 0.901409387588501, "rewards_train/1-l": -1.6381078958511353, "rewards_train/1-w": 2.5685412883758545, "rewards_train/2-2": 1.4292436838150024, "rewards_train/2-w": 0.41600680351257324, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.20664918422699, "rewards_train/margins_1": 1.6671319007873535, "rewards_train/margins_2": 1.0132368803024292, "step": 565 }, { "epoch": 1.69, "logps_train/policy_1_2": -137.81497192382812, "logps_train/policy_1_l": -145.474609375, "logps_train/policy_1_w": -90.78687286376953, "logps_train/policy_2_2": -104.37254333496094, "logps_train/policy_2_w": -130.24815368652344, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 1.349753499031067, "rewards_train/1-l": -2.4701170921325684, "rewards_train/1-w": 3.286937713623047, "rewards_train/2-2": 2.737746000289917, "rewards_train/2-w": 1.8845593929290771, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.757054805755615, "rewards_train/margins_1": 1.93718421459198, "rewards_train/margins_2": 0.8531866073608398, "step": 565 }, { "epoch": 1.69, "learning_rate": 3.2269331687794695e-07, "loss": 0.5481, "step": 566 }, { "epoch": 1.69, "logps_train/policy_1_2": -176.7093505859375, "logps_train/policy_1_l": -152.3963623046875, "logps_train/policy_1_w": -121.1540298461914, "logps_train/policy_2_2": -127.81379699707031, "logps_train/policy_2_w": -157.85565185546875, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 0.6603145003318787, "rewards_train/1-l": -1.871264100074768, "rewards_train/1-w": 3.8545312881469727, "rewards_train/2-2": 2.4076826572418213, "rewards_train/2-w": 2.348810911178589, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.725795388221741, "rewards_train/margins_1": 3.194216787815094, "rewards_train/margins_2": 0.05887174606323242, "step": 566 }, { "epoch": 1.69, "logps_train/policy_1_2": -135.35189819335938, "logps_train/policy_1_l": -169.30926513671875, "logps_train/policy_1_w": -120.20060729980469, "logps_train/policy_2_2": -108.23146057128906, "logps_train/policy_2_w": -148.28298950195312, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.8429360389709473, "rewards_train/1-l": -1.8106154203414917, "rewards_train/1-w": 2.8611903190612793, "rewards_train/2-2": 3.145725727081299, "rewards_train/2-w": 2.159201145172119, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.671805739402771, "rewards_train/margins_1": 1.018254280090332, "rewards_train/margins_2": 0.9865245819091797, "step": 566 }, { "epoch": 1.69, "logps_train/policy_1_2": -118.87513732910156, "logps_train/policy_1_l": -115.17601776123047, "logps_train/policy_1_w": -81.61320495605469, "logps_train/policy_2_2": -86.93035125732422, "logps_train/policy_2_w": -112.23175811767578, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -105.5, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 0.5577982664108276, "rewards_train/1-l": -2.134788990020752, "rewards_train/1-w": 2.3625082969665527, "rewards_train/2-2": 1.8655586242675781, "rewards_train/2-w": 0.8455744981765747, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.497297286987305, "rewards_train/margins_1": 1.804710030555725, "rewards_train/margins_2": 1.0199841260910034, "step": 566 }, { "epoch": 1.69, "logps_train/policy_1_2": -148.6895751953125, "logps_train/policy_1_l": -179.773193359375, "logps_train/policy_1_w": -123.89674377441406, "logps_train/policy_2_2": -118.97174072265625, "logps_train/policy_2_w": -164.3162078857422, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.1259644031524658, "rewards_train/1-l": -1.988307237625122, "rewards_train/1-w": 2.893235683441162, "rewards_train/2-2": 2.226654291152954, "rewards_train/2-w": 1.5640830993652344, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.881542921066284, "rewards_train/margins_1": 1.7672712802886963, "rewards_train/margins_2": 0.6625711917877197, "step": 566 }, { "epoch": 1.69, "logps_train/policy_1_2": -211.39892578125, "logps_train/policy_1_l": -222.69046020507812, "logps_train/policy_1_w": -139.88973999023438, "logps_train/policy_2_2": -176.68727111816406, "logps_train/policy_2_w": -175.7362060546875, "logps_train/ref_1_2": -227.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 1.583496332168579, "rewards_train/1-l": -2.0995142459869385, "rewards_train/1-w": 3.4782137870788574, "rewards_train/2-2": 2.9580302238464355, "rewards_train/2-w": 1.7685670852661133, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.577728033065796, "rewards_train/margins_1": 1.8947174549102783, "rewards_train/margins_2": 1.1894631385803223, "step": 566 }, { "epoch": 1.69, "logps_train/policy_1_2": -171.03512573242188, "logps_train/policy_1_l": -161.7360076904297, "logps_train/policy_1_w": -102.75189971923828, "logps_train/policy_2_2": -137.27493286132812, "logps_train/policy_2_w": -138.9322967529297, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.3996117115020752, "rewards_train/1-l": -2.257194995880127, "rewards_train/1-w": 2.855278968811035, "rewards_train/2-2": 3.0537567138671875, "rewards_train/2-w": 1.7903645038604736, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.112473964691162, "rewards_train/margins_1": 1.45566725730896, "rewards_train/margins_2": 1.2633922100067139, "step": 566 }, { "epoch": 1.69, "logps_train/policy_1_2": -87.32147216796875, "logps_train/policy_1_l": -90.95333862304688, "logps_train/policy_1_w": -93.92558288574219, "logps_train/policy_2_2": -62.06045150756836, "logps_train/policy_2_w": -129.0317840576172, "logps_train/ref_1_2": -96.5, "logps_train/ref_1_l": -79.5, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -80.5, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 0.9061337113380432, "rewards_train/1-l": -1.1707251071929932, "rewards_train/1-w": 2.905879020690918, "rewards_train/2-2": 1.8259860277175903, "rewards_train/2-w": 1.0655714273452759, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.076604127883911, "rewards_train/margins_1": 1.9997453093528748, "rewards_train/margins_2": 0.7604146003723145, "step": 566 }, { "epoch": 1.69, "logps_train/policy_1_2": -238.46206665039062, "logps_train/policy_1_l": -204.79371643066406, "logps_train/policy_1_w": -140.44818115234375, "logps_train/policy_2_2": -165.2412567138672, "logps_train/policy_2_w": -192.00941467285156, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.0350431203842163, "rewards_train/1-l": -2.939136028289795, "rewards_train/1-w": 3.1911187171936035, "rewards_train/2-2": 3.455562114715576, "rewards_train/2-w": 1.199057936668396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.130254745483398, "rewards_train/margins_1": 2.156075596809387, "rewards_train/margins_2": 2.25650417804718, "step": 566 }, { "epoch": 1.7, "logps_train/policy_1_2": -181.65396118164062, "logps_train/policy_1_l": -152.290771484375, "logps_train/policy_1_w": -138.0335693359375, "logps_train/policy_2_2": -139.41921997070312, "logps_train/policy_2_w": -173.91229248046875, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.7705401182174683, "rewards_train/1-l": -1.9999746084213257, "rewards_train/1-w": 2.897033929824829, "rewards_train/2-2": 3.570577383041382, "rewards_train/2-w": 1.695488452911377, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.897008538246155, "rewards_train/margins_1": 1.1264938116073608, "rewards_train/margins_2": 1.8750889301300049, "step": 567 }, { "epoch": 1.7, "logps_train/policy_1_2": -137.74679565429688, "logps_train/policy_1_l": -218.01319885253906, "logps_train/policy_1_w": -163.808349609375, "logps_train/policy_2_2": -105.41377258300781, "logps_train/policy_2_w": -217.83920288085938, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 2.037820339202881, "rewards_train/1-l": -3.326320171356201, "rewards_train/1-w": 3.819164991378784, "rewards_train/2-2": 3.0273728370666504, "rewards_train/2-w": 1.564516305923462, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.145485162734985, "rewards_train/margins_1": 1.7813446521759033, "rewards_train/margins_2": 1.4628565311431885, "step": 567 }, { "epoch": 1.7, "logps_train/policy_1_2": -94.38372039794922, "logps_train/policy_1_l": -132.18939208984375, "logps_train/policy_1_w": -118.86821746826172, "logps_train/policy_2_2": -71.7932357788086, "logps_train/policy_2_w": -165.73049926757812, "logps_train/ref_1_2": -106.5, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -92.5, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.208503246307373, "rewards_train/1-l": -2.0544867515563965, "rewards_train/1-w": 3.0038037300109863, "rewards_train/2-2": 2.0917704105377197, "rewards_train/2-w": 0.8097630739212036, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.058290481567383, "rewards_train/margins_1": 1.7953004837036133, "rewards_train/margins_2": 1.2820073366165161, "step": 567 }, { "epoch": 1.7, "logps_train/policy_1_2": -153.67430114746094, "logps_train/policy_1_l": -154.73191833496094, "logps_train/policy_1_w": -111.08224487304688, "logps_train/policy_2_2": -122.71675109863281, "logps_train/policy_2_w": -143.07618713378906, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.864601492881775, "rewards_train/1-l": -1.7095203399658203, "rewards_train/1-w": 2.9929473400115967, "rewards_train/2-2": 3.2884817123413086, "rewards_train/2-w": 1.644921064376831, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.702467679977417, "rewards_train/margins_1": 1.1283458471298218, "rewards_train/margins_2": 1.6435606479644775, "step": 567 }, { "epoch": 1.7, "logps_train/policy_1_2": -134.58615112304688, "logps_train/policy_1_l": -227.78028869628906, "logps_train/policy_1_w": -110.89142608642578, "logps_train/policy_2_2": -103.62644958496094, "logps_train/policy_2_w": -148.9520721435547, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.692167043685913, "rewards_train/1-l": -2.3749043941497803, "rewards_train/1-w": 2.8491387367248535, "rewards_train/2-2": 2.5166516304016113, "rewards_train/2-w": 1.3172931671142578, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.224043130874634, "rewards_train/margins_1": 1.1569716930389404, "rewards_train/margins_2": 1.1993584632873535, "step": 567 }, { "epoch": 1.7, "logps_train/policy_1_2": -44.78877258300781, "logps_train/policy_1_l": -59.38915252685547, "logps_train/policy_1_w": -45.57758331298828, "logps_train/policy_2_2": -33.090782165527344, "logps_train/policy_2_w": -64.8699722290039, "logps_train/ref_1_2": -52.25, "logps_train/ref_1_l": -46.75, "logps_train/ref_1_w": -63.75, "logps_train/ref_2_2": -45.25, "logps_train/ref_2_w": -76.5, "rewards_train/1-2": 0.7451707124710083, "rewards_train/1-l": -1.2674062252044678, "rewards_train/1-w": 1.816460371017456, "rewards_train/2-2": 1.2306677103042603, "rewards_train/2-w": 1.1489399671554565, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.083866596221924, "rewards_train/margins_1": 1.0712896585464478, "rewards_train/margins_2": 0.08172774314880371, "step": 567 }, { "epoch": 1.7, "logps_train/policy_1_2": -94.22450256347656, "logps_train/policy_1_l": -89.53821563720703, "logps_train/policy_1_w": -87.90552520751953, "logps_train/policy_2_2": -63.468902587890625, "logps_train/policy_2_w": -128.26051330566406, "logps_train/ref_1_2": -103.5, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -84.5, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 0.9322371482849121, "rewards_train/1-l": -0.7223765850067139, "rewards_train/1-w": 2.095482587814331, "rewards_train/2-2": 2.113558769226074, "rewards_train/2-w": 0.3739493787288666, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.817859172821045, "rewards_train/margins_1": 1.163245439529419, "rewards_train/margins_2": 1.7396093904972076, "step": 567 }, { "epoch": 1.7, "logps_train/policy_1_2": -159.1302490234375, "logps_train/policy_1_l": -279.29534912109375, "logps_train/policy_1_w": -130.0694580078125, "logps_train/policy_2_2": -122.90540313720703, "logps_train/policy_2_w": -170.282958984375, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -242.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.7447868585586548, "rewards_train/1-l": -3.714691162109375, "rewards_train/1-w": 3.602428913116455, "rewards_train/2-2": 2.911022663116455, "rewards_train/2-w": 1.890453815460205, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.31712007522583, "rewards_train/margins_1": 1.8576420545578003, "rewards_train/margins_2": 1.02056884765625, "step": 567 }, { "epoch": 1.7, "learning_rate": 3.1066264451090816e-07, "loss": 0.485, "step": 568 }, { "epoch": 1.7, "logps_train/policy_1_2": -228.40545654296875, "logps_train/policy_1_l": -138.67852783203125, "logps_train/policy_1_w": -136.75091552734375, "logps_train/policy_2_2": -162.83013916015625, "logps_train/policy_2_w": -193.41317749023438, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.203986406326294, "rewards_train/1-l": -1.8811347484588623, "rewards_train/1-w": 4.116313934326172, "rewards_train/2-2": 4.357610702514648, "rewards_train/2-w": 2.005557060241699, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.997448682785034, "rewards_train/margins_1": 2.912327527999878, "rewards_train/margins_2": 2.352053642272949, "step": 568 }, { "epoch": 1.7, "logps_train/policy_1_2": -181.34243774414062, "logps_train/policy_1_l": -171.09030151367188, "logps_train/policy_1_w": -82.96414184570312, "logps_train/policy_2_2": -154.33180236816406, "logps_train/policy_2_w": -99.66170501708984, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.6899737119674683, "rewards_train/1-l": -2.1422336101531982, "rewards_train/1-w": 2.9801483154296875, "rewards_train/2-2": 2.6023669242858887, "rewards_train/2-w": 2.294766426086426, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.122381925582886, "rewards_train/margins_1": 1.2901746034622192, "rewards_train/margins_2": 0.3076004981994629, "step": 568 }, { "epoch": 1.7, "logps_train/policy_1_2": -163.01303100585938, "logps_train/policy_1_l": -170.49143981933594, "logps_train/policy_1_w": -99.67916870117188, "logps_train/policy_2_2": -120.28131866455078, "logps_train/policy_2_w": -151.35269165039062, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": 1.9018216133117676, "rewards_train/1-l": -2.090549945831299, "rewards_train/1-w": 3.3678250312805176, "rewards_train/2-2": 2.9796805381774902, "rewards_train/2-w": 1.6127771139144897, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.458374977111816, "rewards_train/margins_1": 1.46600341796875, "rewards_train/margins_2": 1.3669034242630005, "step": 568 }, { "epoch": 1.7, "logps_train/policy_1_2": -159.99732971191406, "logps_train/policy_1_l": -179.8580322265625, "logps_train/policy_1_w": -146.03646850585938, "logps_train/policy_2_2": -117.32915496826172, "logps_train/policy_2_w": -197.79550170898438, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.8346412181854248, "rewards_train/1-l": -1.8661017417907715, "rewards_train/1-w": 3.332875967025757, "rewards_train/2-2": 3.323334217071533, "rewards_train/2-w": 1.6800202131271362, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.198977708816528, "rewards_train/margins_1": 1.498234748840332, "rewards_train/margins_2": 1.643314003944397, "step": 568 }, { "epoch": 1.7, "logps_train/policy_1_2": -179.1710205078125, "logps_train/policy_1_l": -189.60714721679688, "logps_train/policy_1_w": -127.74793243408203, "logps_train/policy_2_2": -128.4541015625, "logps_train/policy_2_w": -189.8687744140625, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.2360235452651978, "rewards_train/1-l": -1.7068090438842773, "rewards_train/1-w": 3.5252060890197754, "rewards_train/2-2": 3.00771427154541, "rewards_train/2-w": 1.119372010231018, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.232015132904053, "rewards_train/margins_1": 2.2891825437545776, "rewards_train/margins_2": 1.888342261314392, "step": 568 }, { "epoch": 1.7, "logps_train/policy_1_2": -175.48341369628906, "logps_train/policy_1_l": -268.9175720214844, "logps_train/policy_1_w": -230.30093383789062, "logps_train/policy_2_2": -138.88134765625, "logps_train/policy_2_w": -299.7967529296875, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -248.0, "logps_train/ref_1_w": -282.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -324.0, "rewards_train/1-2": 3.0766592025756836, "rewards_train/1-l": -2.0417585372924805, "rewards_train/1-w": 5.1011552810668945, "rewards_train/2-2": 4.161863803863525, "rewards_train/2-w": 2.3203253746032715, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.142913818359375, "rewards_train/margins_1": 2.024496078491211, "rewards_train/margins_2": 1.841538429260254, "step": 568 }, { "epoch": 1.7, "logps_train/policy_1_2": -144.093505859375, "logps_train/policy_1_l": -171.54966735839844, "logps_train/policy_1_w": -137.04478454589844, "logps_train/policy_2_2": -104.43794250488281, "logps_train/policy_2_w": -201.64749145507812, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.759399652481079, "rewards_train/1-l": -1.5963736772537231, "rewards_train/1-w": 3.840833902359009, "rewards_train/2-2": 2.8655805587768555, "rewards_train/2-w": 1.7024381160736084, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.437207579612732, "rewards_train/margins_1": 2.0814342498779297, "rewards_train/margins_2": 1.163142442703247, "step": 568 }, { "epoch": 1.7, "logps_train/policy_1_2": -100.71368408203125, "logps_train/policy_1_l": -100.94654846191406, "logps_train/policy_1_w": -52.473297119140625, "logps_train/policy_2_2": -76.0368423461914, "logps_train/policy_2_w": -76.97084045410156, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -83.5, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": 1.3926942348480225, "rewards_train/1-l": -1.7552999258041382, "rewards_train/1-w": 2.524545192718506, "rewards_train/2-2": 2.5674099922180176, "rewards_train/2-w": 1.6333845853805542, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.279845118522644, "rewards_train/margins_1": 1.1318509578704834, "rewards_train/margins_2": 0.9340254068374634, "step": 568 }, { "epoch": 1.7, "logps_train/policy_1_2": -202.97140502929688, "logps_train/policy_1_l": -184.04212951660156, "logps_train/policy_1_w": -149.9752960205078, "logps_train/policy_2_2": -162.14321899414062, "logps_train/policy_2_w": -191.53562927246094, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.955202341079712, "rewards_train/1-l": -2.566713333129883, "rewards_train/1-w": 3.9934866428375244, "rewards_train/2-2": 3.745054006576538, "rewards_train/2-w": 2.10815691947937, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.560199975967407, "rewards_train/margins_1": 2.0382843017578125, "rewards_train/margins_2": 1.636897087097168, "step": 569 }, { "epoch": 1.7, "logps_train/policy_1_2": -158.36819458007812, "logps_train/policy_1_l": -166.147705078125, "logps_train/policy_1_w": -86.31063079833984, "logps_train/policy_2_2": -118.742919921875, "logps_train/policy_2_w": -114.29168701171875, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -109.5, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.0299779176712036, "rewards_train/1-l": -2.6501216888427734, "rewards_train/1-w": 2.3257720470428467, "rewards_train/2-2": 2.9585208892822266, "rewards_train/2-w": 1.4923148155212402, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.97589373588562, "rewards_train/margins_1": 1.295794129371643, "rewards_train/margins_2": 1.4662060737609863, "step": 569 }, { "epoch": 1.7, "logps_train/policy_1_2": -111.5210189819336, "logps_train/policy_1_l": -116.50785827636719, "logps_train/policy_1_w": -70.89131164550781, "logps_train/policy_2_2": -78.40345001220703, "logps_train/policy_2_w": -105.9351806640625, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 1.506491780281067, "rewards_train/1-l": -1.654692530632019, "rewards_train/1-w": 2.9632132053375244, "rewards_train/2-2": 2.560436248779297, "rewards_train/2-w": 1.601793885231018, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.6179057359695435, "rewards_train/margins_1": 1.4567214250564575, "rewards_train/margins_2": 0.9586423635482788, "step": 569 }, { "epoch": 1.7, "logps_train/policy_1_2": -220.79153442382812, "logps_train/policy_1_l": -159.07164001464844, "logps_train/policy_1_w": -115.06666564941406, "logps_train/policy_2_2": -172.29153442382812, "logps_train/policy_2_w": -167.90968322753906, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 1.777878761291504, "rewards_train/1-l": -2.128648281097412, "rewards_train/1-w": 3.3364980220794678, "rewards_train/2-2": 3.693502187728882, "rewards_train/2-w": 1.0871573686599731, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.46514630317688, "rewards_train/margins_1": 1.5586192607879639, "rewards_train/margins_2": 2.6063448190689087, "step": 569 }, { "epoch": 1.7, "logps_train/policy_1_2": -181.6089324951172, "logps_train/policy_1_l": -265.51239013671875, "logps_train/policy_1_w": -160.03314208984375, "logps_train/policy_2_2": -146.81658935546875, "logps_train/policy_2_w": -198.79542541503906, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -238.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 2.1687941551208496, "rewards_train/1-l": -2.689812421798706, "rewards_train/1-w": 4.124810218811035, "rewards_train/2-2": 3.3964648246765137, "rewards_train/2-w": 2.2603001594543457, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.814622640609741, "rewards_train/margins_1": 1.9560160636901855, "rewards_train/margins_2": 1.136164665222168, "step": 569 }, { "epoch": 1.7, "logps_train/policy_1_2": -168.82376098632812, "logps_train/policy_1_l": -103.60722351074219, "logps_train/policy_1_w": -71.40481567382812, "logps_train/policy_2_2": -114.76066589355469, "logps_train/policy_2_w": -108.43766021728516, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -121.5, "rewards_train/1-2": 0.8723100423812866, "rewards_train/1-l": -1.6755666732788086, "rewards_train/1-w": 2.5727999210357666, "rewards_train/2-2": 3.3786213397979736, "rewards_train/2-w": 1.331234335899353, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.248366594314575, "rewards_train/margins_1": 1.70048987865448, "rewards_train/margins_2": 2.0473870038986206, "step": 569 }, { "epoch": 1.7, "logps_train/policy_1_2": -89.00548553466797, "logps_train/policy_1_l": -95.33494567871094, "logps_train/policy_1_w": -90.65316772460938, "logps_train/policy_2_2": -73.96955108642578, "logps_train/policy_2_w": -116.8060531616211, "logps_train/ref_1_2": -103.5, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.465857982635498, "rewards_train/1-l": -1.348729133605957, "rewards_train/1-w": 2.9956214427948, "rewards_train/2-2": 2.306560516357422, "rewards_train/2-w": 1.6928324699401855, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.344350576400757, "rewards_train/margins_1": 1.5297634601593018, "rewards_train/margins_2": 0.6137280464172363, "step": 569 }, { "epoch": 1.7, "logps_train/policy_1_2": -83.81956481933594, "logps_train/policy_1_l": -102.9216079711914, "logps_train/policy_1_w": -76.40655517578125, "logps_train/policy_2_2": -60.1191291809082, "logps_train/policy_2_w": -110.88335418701172, "logps_train/ref_1_2": -96.0, "logps_train/ref_1_l": -84.5, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -81.5, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": 1.1989033222198486, "rewards_train/1-l": -1.8554415702819824, "rewards_train/1-w": 2.4929380416870117, "rewards_train/2-2": 2.149024724960327, "rewards_train/2-w": 0.9640088677406311, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.348379611968994, "rewards_train/margins_1": 1.294034719467163, "rewards_train/margins_2": 1.185015857219696, "step": 569 }, { "epoch": 1.71, "learning_rate": 2.9884564761020083e-07, "loss": 0.3124, "step": 570 }, { "epoch": 1.71, "logps_train/policy_1_2": -206.82717895507812, "logps_train/policy_1_l": -252.82070922851562, "logps_train/policy_1_w": -200.69204711914062, "logps_train/policy_2_2": -166.31607055664062, "logps_train/policy_2_w": -253.9462890625, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -248.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -276.0, "rewards_train/1-2": 2.3827123641967773, "rewards_train/1-l": -3.0660548210144043, "rewards_train/1-w": 4.7776689529418945, "rewards_train/2-2": 3.9400720596313477, "rewards_train/2-w": 2.2389650344848633, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.843723773956299, "rewards_train/margins_1": 2.394956588745117, "rewards_train/margins_2": 1.7011070251464844, "step": 570 }, { "epoch": 1.71, "logps_train/policy_1_2": -162.11880493164062, "logps_train/policy_1_l": -85.1220474243164, "logps_train/policy_1_w": -77.121826171875, "logps_train/policy_2_2": -113.19229888916016, "logps_train/policy_2_w": -101.93144989013672, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -75.0, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 1.226401686668396, "rewards_train/1-l": -1.016452670097351, "rewards_train/1-w": 2.9531002044677734, "rewards_train/2-2": 2.9612386226654053, "rewards_train/2-w": 1.7646191120147705, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9695528745651245, "rewards_train/margins_1": 1.7266985177993774, "rewards_train/margins_2": 1.1966195106506348, "step": 570 }, { "epoch": 1.71, "logps_train/policy_1_2": -163.99520874023438, "logps_train/policy_1_l": -201.79705810546875, "logps_train/policy_1_w": -157.11184692382812, "logps_train/policy_2_2": -121.66523742675781, "logps_train/policy_2_w": -207.16226196289062, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.5786043405532837, "rewards_train/1-l": -1.3672070503234863, "rewards_train/1-w": 3.40502667427063, "rewards_train/2-2": 2.8834755420684814, "rewards_train/2-w": 1.2374835014343262, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.772233724594116, "rewards_train/margins_1": 1.8264223337173462, "rewards_train/margins_2": 1.6459920406341553, "step": 570 }, { "epoch": 1.71, "logps_train/policy_1_2": -178.796630859375, "logps_train/policy_1_l": -194.6111602783203, "logps_train/policy_1_w": -124.40542602539062, "logps_train/policy_2_2": -146.62759399414062, "logps_train/policy_2_w": -161.51080322265625, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.921900749206543, "rewards_train/1-l": -2.3550610542297363, "rewards_train/1-w": 3.839144468307495, "rewards_train/2-2": 3.0473971366882324, "rewards_train/2-w": 2.3987250328063965, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.1942055225372314, "rewards_train/margins_1": 1.9172437191009521, "rewards_train/margins_2": 0.6486721038818359, "step": 570 }, { "epoch": 1.71, "logps_train/policy_1_2": -164.96432495117188, "logps_train/policy_1_l": -165.14852905273438, "logps_train/policy_1_w": -171.56594848632812, "logps_train/policy_2_2": -123.37355041503906, "logps_train/policy_2_w": -216.446044921875, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 1.7004420757293701, "rewards_train/1-l": -1.7879002094268799, "rewards_train/1-w": 3.4746551513671875, "rewards_train/2-2": 3.529832601547241, "rewards_train/2-w": 1.3710216283798218, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.262555360794067, "rewards_train/margins_1": 1.7742130756378174, "rewards_train/margins_2": 2.1588109731674194, "step": 570 }, { "epoch": 1.71, "logps_train/policy_1_2": -126.27503967285156, "logps_train/policy_1_l": -109.72943878173828, "logps_train/policy_1_w": -75.74243927001953, "logps_train/policy_2_2": -91.9474105834961, "logps_train/policy_2_w": -104.17231750488281, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 1.2232778072357178, "rewards_train/1-l": -1.6635688543319702, "rewards_train/1-w": 2.471264123916626, "rewards_train/2-2": 2.599400043487549, "rewards_train/2-w": 1.612748384475708, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.134832978248596, "rewards_train/margins_1": 1.2479863166809082, "rewards_train/margins_2": 0.9866516590118408, "step": 570 }, { "epoch": 1.71, "logps_train/policy_1_2": -112.38264465332031, "logps_train/policy_1_l": -88.2421875, "logps_train/policy_1_w": -27.722026824951172, "logps_train/policy_2_2": -84.67120361328125, "logps_train/policy_2_w": -51.58232879638672, "logps_train/ref_1_2": -122.0, "logps_train/ref_1_l": -66.5, "logps_train/ref_1_w": -45.25, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -63.5, "rewards_train/1-2": 0.9679849147796631, "rewards_train/1-l": -2.1828126907348633, "rewards_train/1-w": 1.761000394821167, "rewards_train/2-2": 2.0050477981567383, "rewards_train/2-w": 1.1866893768310547, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.9438130855560303, "rewards_train/margins_1": 0.7930154800415039, "rewards_train/margins_2": 0.8183584213256836, "step": 570 }, { "epoch": 1.71, "logps_train/policy_1_2": -114.68002319335938, "logps_train/policy_1_l": -108.03555297851562, "logps_train/policy_1_w": -74.5701904296875, "logps_train/policy_2_2": -85.50837707519531, "logps_train/policy_2_w": -114.78191375732422, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -93.5, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 1.4421544075012207, "rewards_train/1-l": -1.4576568603515625, "rewards_train/1-w": 2.924231767654419, "rewards_train/2-2": 2.664005994796753, "rewards_train/2-w": 1.321808934211731, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.3818886280059814, "rewards_train/margins_1": 1.4820773601531982, "rewards_train/margins_2": 1.342197060585022, "step": 570 }, { "epoch": 1.71, "logps_train/policy_1_2": -235.3887939453125, "logps_train/policy_1_l": -279.475830078125, "logps_train/policy_1_w": -220.34518432617188, "logps_train/policy_2_2": -182.7156982421875, "logps_train/policy_2_w": -303.52203369140625, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -260.0, "logps_train/ref_1_w": -268.0, "logps_train/ref_2_2": -229.0, "logps_train/ref_2_w": -320.0, "rewards_train/1-2": 3.2251827716827393, "rewards_train/1-l": -1.922581434249878, "rewards_train/1-w": 4.826224327087402, "rewards_train/2-2": 4.608118534088135, "rewards_train/2-w": 1.6208431720733643, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.74880576133728, "rewards_train/margins_1": 1.601041555404663, "rewards_train/margins_2": 2.9872753620147705, "step": 571 }, { "epoch": 1.71, "logps_train/policy_1_2": -164.30221557617188, "logps_train/policy_1_l": -203.817138671875, "logps_train/policy_1_w": -175.4609375, "logps_train/policy_2_2": -122.88601684570312, "logps_train/policy_2_w": -236.8404541015625, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 2.088526964187622, "rewards_train/1-l": -1.8364001512527466, "rewards_train/1-w": 4.174219131469727, "rewards_train/2-2": 3.4785852432250977, "rewards_train/2-w": 2.240954875946045, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.010619282722473, "rewards_train/margins_1": 2.0856921672821045, "rewards_train/margins_2": 1.2376303672790527, "step": 571 }, { "epoch": 1.71, "logps_train/policy_1_2": -161.40042114257812, "logps_train/policy_1_l": -122.624267578125, "logps_train/policy_1_w": -123.40104675292969, "logps_train/policy_2_2": -124.96693420410156, "logps_train/policy_2_w": -160.84552001953125, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.8646451234817505, "rewards_train/1-l": -1.3037351369857788, "rewards_train/1-w": 3.1700520515441895, "rewards_train/2-2": 3.025181770324707, "rewards_train/2-w": 1.7341984510421753, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.473787188529968, "rewards_train/margins_1": 1.305406928062439, "rewards_train/margins_2": 1.2909833192825317, "step": 571 }, { "epoch": 1.71, "logps_train/policy_1_2": -157.41058349609375, "logps_train/policy_1_l": -199.572998046875, "logps_train/policy_1_w": -113.62103271484375, "logps_train/policy_2_2": -115.65646362304688, "logps_train/policy_2_w": -163.966796875, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.238823413848877, "rewards_train/1-l": -2.2832770347595215, "rewards_train/1-w": 4.207427501678467, "rewards_train/2-2": 2.9206814765930176, "rewards_train/2-w": 2.5830087661743164, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.490704536437988, "rewards_train/margins_1": 2.96860408782959, "rewards_train/margins_2": 0.33767271041870117, "step": 571 }, { "epoch": 1.71, "logps_train/policy_1_2": -183.30804443359375, "logps_train/policy_1_l": -214.80401611328125, "logps_train/policy_1_w": -171.24087524414062, "logps_train/policy_2_2": -155.49046325683594, "logps_train/policy_2_w": -217.70254516601562, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -211.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 2.562945604324341, "rewards_train/1-l": -2.4288370609283447, "rewards_train/1-w": 3.9977877140045166, "rewards_train/2-2": 3.5447044372558594, "rewards_train/2-w": 2.031309127807617, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.426624774932861, "rewards_train/margins_1": 1.4348421096801758, "rewards_train/margins_2": 1.5133953094482422, "step": 571 }, { "epoch": 1.71, "logps_train/policy_1_2": -148.2443389892578, "logps_train/policy_1_l": -138.61570739746094, "logps_train/policy_1_w": -82.60987854003906, "logps_train/policy_2_2": -115.8931884765625, "logps_train/policy_2_w": -105.68434143066406, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 1.48025381565094, "rewards_train/1-l": -1.6370594501495361, "rewards_train/1-w": 2.911668062210083, "rewards_train/2-2": 3.08489990234375, "rewards_train/2-w": 2.1175026893615723, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.548727512359619, "rewards_train/margins_1": 1.431414246559143, "rewards_train/margins_2": 0.9673972129821777, "step": 571 }, { "epoch": 1.71, "logps_train/policy_1_2": -135.542724609375, "logps_train/policy_1_l": -151.46194458007812, "logps_train/policy_1_w": -141.59359741210938, "logps_train/policy_2_2": -105.91687774658203, "logps_train/policy_2_w": -170.93711853027344, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.886157751083374, "rewards_train/1-l": -2.1532270908355713, "rewards_train/1-w": 2.6795084476470947, "rewards_train/2-2": 2.9897568225860596, "rewards_train/2-w": 1.2182018756866455, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.832735538482666, "rewards_train/margins_1": 0.7933506965637207, "rewards_train/margins_2": 1.771554946899414, "step": 571 }, { "epoch": 1.71, "logps_train/policy_1_2": -125.21855163574219, "logps_train/policy_1_l": -103.11347961425781, "logps_train/policy_1_w": -61.75391387939453, "logps_train/policy_2_2": -91.57353210449219, "logps_train/policy_2_w": -97.0040283203125, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -85.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -106.5, "rewards_train/1-2": 0.9148631691932678, "rewards_train/1-l": -1.3044359683990479, "rewards_train/1-w": 2.326561450958252, "rewards_train/2-2": 2.2649121284484863, "rewards_train/2-w": 0.9437381625175476, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6309974193573, "rewards_train/margins_1": 1.4116982817649841, "rewards_train/margins_2": 1.3211739659309387, "step": 571 }, { "epoch": 1.71, "learning_rate": 2.8724347949389056e-07, "loss": 0.3912, "step": 572 }, { "epoch": 1.71, "logps_train/policy_1_2": -163.11827087402344, "logps_train/policy_1_l": -192.3555145263672, "logps_train/policy_1_w": -159.3080291748047, "logps_train/policy_2_2": -113.56582641601562, "logps_train/policy_2_w": -202.4063720703125, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 1.713172435760498, "rewards_train/1-l": -3.037113666534424, "rewards_train/1-w": 3.460603713989258, "rewards_train/2-2": 3.4268157482147217, "rewards_train/2-w": 1.473425030708313, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.497717380523682, "rewards_train/margins_1": 1.7474312782287598, "rewards_train/margins_2": 1.9533907175064087, "step": 572 }, { "epoch": 1.71, "logps_train/policy_1_2": -153.50686645507812, "logps_train/policy_1_l": -173.21820068359375, "logps_train/policy_1_w": -145.24771118164062, "logps_train/policy_2_2": -117.2183837890625, "logps_train/policy_2_w": -185.46046447753906, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.1040003299713135, "rewards_train/1-l": -2.514789342880249, "rewards_train/1-w": 2.8431990146636963, "rewards_train/2-2": 2.682849407196045, "rewards_train/2-w": 1.0555161237716675, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.357988357543945, "rewards_train/margins_1": 1.7391986846923828, "rewards_train/margins_2": 1.6273332834243774, "step": 572 }, { "epoch": 1.71, "logps_train/policy_1_2": -197.43492126464844, "logps_train/policy_1_l": -205.30831909179688, "logps_train/policy_1_w": -105.98283386230469, "logps_train/policy_2_2": -152.8255615234375, "logps_train/policy_2_w": -136.3238525390625, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.1861951351165771, "rewards_train/1-l": -2.677903652191162, "rewards_train/1-w": 2.390779733657837, "rewards_train/2-2": 3.3815059661865234, "rewards_train/2-w": 1.3238637447357178, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.068683385848999, "rewards_train/margins_1": 1.2045845985412598, "rewards_train/margins_2": 2.0576422214508057, "step": 572 }, { "epoch": 1.71, "logps_train/policy_1_2": -142.38958740234375, "logps_train/policy_1_l": -119.93978881835938, "logps_train/policy_1_w": -70.89378356933594, "logps_train/policy_2_2": -103.74812316894531, "logps_train/policy_2_w": -107.19386291503906, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -93.5, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": 0.7836981415748596, "rewards_train/1-l": -2.5695643424987793, "rewards_train/1-w": 2.278590679168701, "rewards_train/2-2": 2.269719362258911, "rewards_train/2-w": 0.7821763753890991, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.8481550216674805, "rewards_train/margins_1": 1.4948925375938416, "rewards_train/margins_2": 1.487542986869812, "step": 572 }, { "epoch": 1.71, "logps_train/policy_1_2": -111.4114990234375, "logps_train/policy_1_l": -95.50277709960938, "logps_train/policy_1_w": -82.30875396728516, "logps_train/policy_2_2": -83.3057861328125, "logps_train/policy_2_w": -106.45744323730469, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -80.5, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 1.3490846157073975, "rewards_train/1-l": -1.5287926197052002, "rewards_train/1-w": 2.5519375801086426, "rewards_train/2-2": 2.548717975616455, "rewards_train/2-w": 1.4136309623718262, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.080730199813843, "rewards_train/margins_1": 1.2028529644012451, "rewards_train/margins_2": 1.135087013244629, "step": 572 }, { "epoch": 1.71, "logps_train/policy_1_2": -249.89791870117188, "logps_train/policy_1_l": -309.0591735839844, "logps_train/policy_1_w": -234.61904907226562, "logps_train/policy_2_2": -189.78317260742188, "logps_train/policy_2_w": -307.1368713378906, "logps_train/ref_1_2": -280.0, "logps_train/ref_1_l": -276.0, "logps_train/ref_1_w": -282.0, "logps_train/ref_2_2": -246.0, "logps_train/ref_2_w": -324.0, "rewards_train/1-2": 3.1383328437805176, "rewards_train/1-l": -3.237168788909912, "rewards_train/1-w": 4.825594902038574, "rewards_train/2-2": 5.5935587882995605, "rewards_train/2-w": 1.7925630807876587, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 8.062763690948486, "rewards_train/margins_1": 1.6872620582580566, "rewards_train/margins_2": 3.800995707511902, "step": 572 }, { "epoch": 1.71, "logps_train/policy_1_2": -145.30584716796875, "logps_train/policy_1_l": -141.7421112060547, "logps_train/policy_1_w": -113.62711334228516, "logps_train/policy_2_2": -120.73388671875, "logps_train/policy_2_w": -143.11026000976562, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.353790521621704, "rewards_train/1-l": -1.891007900238037, "rewards_train/1-w": 3.1060385704040527, "rewards_train/2-2": 2.547705888748169, "rewards_train/2-w": 1.742099642753601, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.99704647064209, "rewards_train/margins_1": 1.7522480487823486, "rewards_train/margins_2": 0.8056062459945679, "step": 572 }, { "epoch": 1.71, "logps_train/policy_1_2": -81.20389556884766, "logps_train/policy_1_l": -82.443115234375, "logps_train/policy_1_w": -62.931976318359375, "logps_train/policy_2_2": -62.407676696777344, "logps_train/policy_2_w": -85.14814758300781, "logps_train/ref_1_2": -92.0, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -83.5, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 1.0764856338500977, "rewards_train/1-l": -1.2392334938049316, "rewards_train/1-w": 2.2974274158477783, "rewards_train/2-2": 2.124662160873413, "rewards_train/2-w": 1.246903419494629, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.53666090965271, "rewards_train/margins_1": 1.2209417819976807, "rewards_train/margins_2": 0.8777587413787842, "step": 572 }, { "epoch": 1.72, "logps_train/policy_1_2": -137.40457153320312, "logps_train/policy_1_l": -134.71673583984375, "logps_train/policy_1_w": -119.50288391113281, "logps_train/policy_2_2": -113.3265609741211, "logps_train/policy_2_w": -146.68386840820312, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 2.0009493827819824, "rewards_train/1-l": -1.6671802997589111, "rewards_train/1-w": 2.770024299621582, "rewards_train/2-2": 3.155625343322754, "rewards_train/2-w": 1.6667697429656982, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.437204599380493, "rewards_train/margins_1": 0.7690749168395996, "rewards_train/margins_2": 1.4888556003570557, "step": 573 }, { "epoch": 1.72, "logps_train/policy_1_2": -146.20150756835938, "logps_train/policy_1_l": -145.64089965820312, "logps_train/policy_1_w": -93.8600082397461, "logps_train/policy_2_2": -120.7636947631836, "logps_train/policy_2_w": -118.21891021728516, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.1009416580200195, "rewards_train/1-l": -1.622683048248291, "rewards_train/1-w": 2.8077492713928223, "rewards_train/2-2": 2.0152316093444824, "rewards_train/2-w": 1.5976402759552002, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.430432319641113, "rewards_train/margins_1": 1.7068076133728027, "rewards_train/margins_2": 0.4175913333892822, "step": 573 }, { "epoch": 1.72, "logps_train/policy_1_2": -220.8806610107422, "logps_train/policy_1_l": -243.01528930664062, "logps_train/policy_1_w": -198.83267211914062, "logps_train/policy_2_2": -172.61534118652344, "logps_train/policy_2_w": -245.893310546875, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -234.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -258.0, "rewards_train/1-2": 2.147871971130371, "rewards_train/1-l": -3.089811325073242, "rewards_train/1-w": 3.564390182495117, "rewards_train/2-2": 3.7259654998779297, "rewards_train/2-w": 1.2887934446334839, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.654201507568359, "rewards_train/margins_1": 1.416518211364746, "rewards_train/margins_2": 2.437172055244446, "step": 573 }, { "epoch": 1.72, "logps_train/policy_1_2": -197.19068908691406, "logps_train/policy_1_l": -169.88577270507812, "logps_train/policy_1_w": -141.43258666992188, "logps_train/policy_2_2": -156.49452209472656, "logps_train/policy_2_w": -192.60067749023438, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": 2.6309309005737305, "rewards_train/1-l": -2.742190361022949, "rewards_train/1-w": 4.112992286682129, "rewards_train/2-2": 4.1442975997924805, "rewards_train/2-w": 1.8461816310882568, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.855182647705078, "rewards_train/margins_1": 1.4820613861083984, "rewards_train/margins_2": 2.2981159687042236, "step": 573 }, { "epoch": 1.72, "logps_train/policy_1_2": -75.55857849121094, "logps_train/policy_1_l": -149.38388061523438, "logps_train/policy_1_w": -100.29435729980469, "logps_train/policy_2_2": -55.16233825683594, "logps_train/policy_2_w": -124.44444274902344, "logps_train/ref_1_2": -87.5, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -75.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 1.2050799131393433, "rewards_train/1-l": -2.6469831466674805, "rewards_train/1-w": 2.1088457107543945, "rewards_train/2-2": 1.975171685218811, "rewards_train/2-w": 1.0118054151535034, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.755828857421875, "rewards_train/margins_1": 0.9037657976150513, "rewards_train/margins_2": 0.9633662700653076, "step": 573 }, { "epoch": 1.72, "logps_train/policy_1_2": -171.56982421875, "logps_train/policy_1_l": -150.97447204589844, "logps_train/policy_1_w": -114.3332748413086, "logps_train/policy_2_2": -131.2196807861328, "logps_train/policy_2_w": -144.87428283691406, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 0.9805179238319397, "rewards_train/1-l": -1.8441262245178223, "rewards_train/1-w": 3.269407033920288, "rewards_train/2-2": 2.579594135284424, "rewards_train/2-w": 1.9450912475585938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.11353325843811, "rewards_train/margins_1": 2.2888891100883484, "rewards_train/margins_2": 0.6345028877258301, "step": 573 }, { "epoch": 1.72, "logps_train/policy_1_2": -164.42764282226562, "logps_train/policy_1_l": -144.1280059814453, "logps_train/policy_1_w": -107.45576477050781, "logps_train/policy_2_2": -126.14108276367188, "logps_train/policy_2_w": -141.0488739013672, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.7017672061920166, "rewards_train/1-l": -2.5526444911956787, "rewards_train/1-w": 3.2731738090515137, "rewards_train/2-2": 3.396829128265381, "rewards_train/2-w": 1.9658164978027344, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.825818300247192, "rewards_train/margins_1": 1.571406602859497, "rewards_train/margins_2": 1.4310126304626465, "step": 573 }, { "epoch": 1.72, "logps_train/policy_1_2": -164.51443481445312, "logps_train/policy_1_l": -176.91082763671875, "logps_train/policy_1_w": -155.60690307617188, "logps_train/policy_2_2": -139.1986083984375, "logps_train/policy_2_w": -203.88665771484375, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -231.0, "rewards_train/1-2": 2.050119400024414, "rewards_train/1-l": -2.5084657669067383, "rewards_train/1-w": 4.451809406280518, "rewards_train/2-2": 3.1402945518493652, "rewards_train/2-w": 2.7230522632598877, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.960275173187256, "rewards_train/margins_1": 2.4016900062561035, "rewards_train/margins_2": 0.41724228858947754, "step": 573 }, { "epoch": 1.72, "learning_rate": 2.7585727251313196e-07, "loss": 0.5474, "step": 574 }, { "epoch": 1.72, "logps_train/policy_1_2": -71.51251983642578, "logps_train/policy_1_l": -84.931640625, "logps_train/policy_1_w": -46.86862564086914, "logps_train/policy_2_2": -47.77327346801758, "logps_train/policy_2_w": -66.47828674316406, "logps_train/ref_1_2": -77.0, "logps_train/ref_1_l": -71.5, "logps_train/ref_1_w": -64.0, "logps_train/ref_2_2": -63.25, "logps_train/ref_2_w": -78.5, "rewards_train/1-2": 0.5359547734260559, "rewards_train/1-l": -1.3361331224441528, "rewards_train/1-w": 1.727590560913086, "rewards_train/2-2": 1.5395182371139526, "rewards_train/2-w": 1.2000231742858887, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0637236833572388, "rewards_train/margins_1": 1.19163578748703, "rewards_train/margins_2": 0.33949506282806396, "step": 574 }, { "epoch": 1.72, "logps_train/policy_1_2": -88.33414459228516, "logps_train/policy_1_l": -91.55083465576172, "logps_train/policy_1_w": -100.72246551513672, "logps_train/policy_2_2": -57.05730056762695, "logps_train/policy_2_w": -141.69793701171875, "logps_train/ref_1_2": -95.0, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -75.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 0.6861171722412109, "rewards_train/1-l": -0.9486383199691772, "rewards_train/1-w": 2.1256046295166016, "rewards_train/2-2": 1.7985665798187256, "rewards_train/2-w": 0.31580212712287903, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.074242949485779, "rewards_train/margins_1": 1.4394874572753906, "rewards_train/margins_2": 1.4827644526958466, "step": 574 }, { "epoch": 1.72, "logps_train/policy_1_2": -110.17755126953125, "logps_train/policy_1_l": -102.94168090820312, "logps_train/policy_1_w": -82.01258087158203, "logps_train/policy_2_2": -78.12163543701172, "logps_train/policy_2_w": -115.62109375, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -85.5, "logps_train/ref_1_w": -112.5, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": 0.9556822776794434, "rewards_train/1-l": -1.7408480644226074, "rewards_train/1-w": 3.026866912841797, "rewards_train/2-2": 2.173969268798828, "rewards_train/2-w": 1.1988279819488525, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.767714977264404, "rewards_train/margins_1": 2.0711846351623535, "rewards_train/margins_2": 0.9751412868499756, "step": 574 }, { "epoch": 1.72, "logps_train/policy_1_2": -167.8507843017578, "logps_train/policy_1_l": -251.36849975585938, "logps_train/policy_1_w": -173.35983276367188, "logps_train/policy_2_2": -129.78602600097656, "logps_train/policy_2_w": -241.09930419921875, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -224.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 2.250858783721924, "rewards_train/1-l": -2.7227869033813477, "rewards_train/1-w": 4.323391914367676, "rewards_train/2-2": 3.4409284591674805, "rewards_train/2-w": 1.7806955575942993, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.046178817749023, "rewards_train/margins_1": 2.072533130645752, "rewards_train/margins_2": 1.6602329015731812, "step": 574 }, { "epoch": 1.72, "logps_train/policy_1_2": -118.08391571044922, "logps_train/policy_1_l": -96.6828384399414, "logps_train/policy_1_w": -58.21419143676758, "logps_train/policy_2_2": -92.5931625366211, "logps_train/policy_2_w": -76.82377624511719, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -75.5, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -118.5, "logps_train/ref_2_w": -98.5, "rewards_train/1-2": 1.5923898220062256, "rewards_train/1-l": -2.1315650939941406, "rewards_train/1-w": 2.8563153743743896, "rewards_train/2-2": 2.600351333618164, "rewards_train/2-w": 2.1754350662231445, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.98788046836853, "rewards_train/margins_1": 1.263925552368164, "rewards_train/margins_2": 0.42491626739501953, "step": 574 }, { "epoch": 1.72, "logps_train/policy_1_2": -150.56015014648438, "logps_train/policy_1_l": -174.392822265625, "logps_train/policy_1_w": -103.87499237060547, "logps_train/policy_2_2": -118.48228454589844, "logps_train/policy_2_w": -130.03822326660156, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.9377350807189941, "rewards_train/1-l": -2.781470775604248, "rewards_train/1-w": 3.4953126907348633, "rewards_train/2-2": 3.4205217361450195, "rewards_train/2-w": 2.228990077972412, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.276783466339111, "rewards_train/margins_1": 1.5575776100158691, "rewards_train/margins_2": 1.1915316581726074, "step": 574 }, { "epoch": 1.72, "logps_train/policy_1_2": -221.96255493164062, "logps_train/policy_1_l": -328.6705322265625, "logps_train/policy_1_w": -121.61857604980469, "logps_train/policy_2_2": -172.5303955078125, "logps_train/policy_2_w": -166.61331176757812, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -292.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -207.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.5303071737289429, "rewards_train/1-l": -3.803382396697998, "rewards_train/1-w": 2.9545481204986572, "rewards_train/2-2": 3.4750864505767822, "rewards_train/2-w": 1.6761687994003296, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.757930517196655, "rewards_train/margins_1": 1.4242409467697144, "rewards_train/margins_2": 1.7989176511764526, "step": 574 }, { "epoch": 1.72, "logps_train/policy_1_2": -108.9852066040039, "logps_train/policy_1_l": -108.49971771240234, "logps_train/policy_1_w": -115.55830383300781, "logps_train/policy_2_2": -84.6163101196289, "logps_train/policy_2_w": -154.95318603515625, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.4616354703903198, "rewards_train/1-l": -1.598799467086792, "rewards_train/1-w": 2.721513271331787, "rewards_train/2-2": 2.482119083404541, "rewards_train/2-w": 1.2687439918518066, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.320312738418579, "rewards_train/margins_1": 1.2598778009414673, "rewards_train/margins_2": 1.2133750915527344, "step": 574 }, { "epoch": 1.72, "logps_train/policy_1_2": -234.97161865234375, "logps_train/policy_1_l": -198.33731079101562, "logps_train/policy_1_w": -177.6503143310547, "logps_train/policy_2_2": -193.1722869873047, "logps_train/policy_2_w": -220.91073608398438, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -216.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 2.280963897705078, "rewards_train/1-l": -2.032167911529541, "rewards_train/1-w": 3.805281162261963, "rewards_train/2-2": 3.8921456336975098, "rewards_train/2-w": 1.8886135816574097, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.837449073791504, "rewards_train/margins_1": 1.5243172645568848, "rewards_train/margins_2": 2.0035320520401, "step": 575 }, { "epoch": 1.72, "logps_train/policy_1_2": -173.98867797851562, "logps_train/policy_1_l": -170.69198608398438, "logps_train/policy_1_w": -87.84183502197266, "logps_train/policy_2_2": -144.52297973632812, "logps_train/policy_2_w": -120.68475341796875, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 1.9011322259902954, "rewards_train/1-l": -2.0805509090423584, "rewards_train/1-w": 2.6556601524353027, "rewards_train/2-2": 3.1820778846740723, "rewards_train/2-w": 1.7283999919891357, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.736211061477661, "rewards_train/margins_1": 0.7545279264450073, "rewards_train/margins_2": 1.4536778926849365, "step": 575 }, { "epoch": 1.72, "logps_train/policy_1_2": -110.21128845214844, "logps_train/policy_1_l": -173.40087890625, "logps_train/policy_1_w": -118.36607360839844, "logps_train/policy_2_2": -85.81289672851562, "logps_train/policy_2_w": -152.751708984375, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.40855872631073, "rewards_train/1-l": -2.4334473609924316, "rewards_train/1-w": 2.8946425914764404, "rewards_train/2-2": 2.3382413387298584, "rewards_train/2-w": 1.221703290939331, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.328089952468872, "rewards_train/margins_1": 1.4860838651657104, "rewards_train/margins_2": 1.1165380477905273, "step": 575 }, { "epoch": 1.72, "logps_train/policy_1_2": -149.1251678466797, "logps_train/policy_1_l": -128.11465454101562, "logps_train/policy_1_w": -94.11469268798828, "logps_train/policy_2_2": -112.6973876953125, "logps_train/policy_2_w": -132.00218200683594, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -110.5, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.8656076192855835, "rewards_train/1-l": -1.7661542892456055, "rewards_train/1-w": 2.829155683517456, "rewards_train/2-2": 3.0083861351013184, "rewards_train/2-w": 1.3794686794281006, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.5953099727630615, "rewards_train/margins_1": 0.9635480642318726, "rewards_train/margins_2": 1.6289174556732178, "step": 575 }, { "epoch": 1.72, "logps_train/policy_1_2": -110.83419799804688, "logps_train/policy_1_l": -154.81094360351562, "logps_train/policy_1_w": -91.57282257080078, "logps_train/policy_2_2": -84.70098114013672, "logps_train/policy_2_w": -118.00140380859375, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.3064243793487549, "rewards_train/1-l": -2.7406656742095947, "rewards_train/1-w": 2.7530691623687744, "rewards_train/2-2": 2.539081573486328, "rewards_train/2-w": 1.47603178024292, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.493734836578369, "rewards_train/margins_1": 1.4466447830200195, "rewards_train/margins_2": 1.0630497932434082, "step": 575 }, { "epoch": 1.72, "logps_train/policy_1_2": -228.38739013671875, "logps_train/policy_1_l": -223.2294921875, "logps_train/policy_1_w": -143.59756469726562, "logps_train/policy_2_2": -187.2271728515625, "logps_train/policy_2_w": -184.38677978515625, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 2.129523515701294, "rewards_train/1-l": -1.7640628814697266, "rewards_train/1-w": 3.502255916595459, "rewards_train/2-2": 3.4255237579345703, "rewards_train/2-w": 2.1876883506774902, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.2663187980651855, "rewards_train/margins_1": 1.372732400894165, "rewards_train/margins_2": 1.23783540725708, "step": 575 }, { "epoch": 1.72, "logps_train/policy_1_2": -90.53916931152344, "logps_train/policy_1_l": -105.91617584228516, "logps_train/policy_1_w": -109.10013580322266, "logps_train/policy_2_2": -66.14625549316406, "logps_train/policy_2_w": -138.1284637451172, "logps_train/ref_1_2": -104.5, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.410145878791809, "rewards_train/1-l": -1.4274570941925049, "rewards_train/1-w": 2.9384238719940186, "rewards_train/2-2": 2.485374927520752, "rewards_train/2-w": 1.481879711151123, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.365880966186523, "rewards_train/margins_1": 1.5282779932022095, "rewards_train/margins_2": 1.003495216369629, "step": 575 }, { "epoch": 1.72, "logps_train/policy_1_2": -127.58100891113281, "logps_train/policy_1_l": -278.58056640625, "logps_train/policy_1_w": -83.09855651855469, "logps_train/policy_2_2": -94.51200866699219, "logps_train/policy_2_w": -111.97344970703125, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -239.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.7825233936309814, "rewards_train/1-l": -3.9369630813598633, "rewards_train/1-w": 2.7604570388793945, "rewards_train/2-2": 2.916766881942749, "rewards_train/2-w": 2.0323429107666016, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.697420120239258, "rewards_train/margins_1": 0.9779336452484131, "rewards_train/margins_2": 0.8844239711761475, "step": 575 }, { "epoch": 1.72, "learning_rate": 2.6468813794165356e-07, "loss": 0.4082, "step": 576 }, { "epoch": 1.72, "logps_train/policy_1_2": -167.45074462890625, "logps_train/policy_1_l": -180.80178833007812, "logps_train/policy_1_w": -128.056396484375, "logps_train/policy_2_2": -133.8311004638672, "logps_train/policy_2_w": -171.71107482910156, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.40336275100708, "rewards_train/1-l": -2.525491952896118, "rewards_train/1-w": 3.3185791969299316, "rewards_train/2-2": 2.8356409072875977, "rewards_train/2-w": 1.3585799932479858, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.84407114982605, "rewards_train/margins_1": 1.9152164459228516, "rewards_train/margins_2": 1.4770609140396118, "step": 576 }, { "epoch": 1.72, "logps_train/policy_1_2": -139.8717803955078, "logps_train/policy_1_l": -129.1435546875, "logps_train/policy_1_w": -93.97450256347656, "logps_train/policy_2_2": -102.28959655761719, "logps_train/policy_2_w": -116.06761932373047, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.385478138923645, "rewards_train/1-l": -1.6059565544128418, "rewards_train/1-w": 2.497861862182617, "rewards_train/2-2": 2.7212352752685547, "rewards_train/2-w": 1.6318119764328003, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.103818416595459, "rewards_train/margins_1": 1.1123837232589722, "rewards_train/margins_2": 1.0894232988357544, "step": 576 }, { "epoch": 1.72, "logps_train/policy_1_2": -136.58502197265625, "logps_train/policy_1_l": -121.58660888671875, "logps_train/policy_1_w": -105.14691925048828, "logps_train/policy_2_2": -100.5733871459961, "logps_train/policy_2_w": -133.58941650390625, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.8414968848228455, "rewards_train/1-l": -2.038348436355591, "rewards_train/1-w": 2.564995288848877, "rewards_train/2-2": 2.2766451835632324, "rewards_train/2-w": 1.2754340171813965, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.603343725204468, "rewards_train/margins_1": 1.7234984040260315, "rewards_train/margins_2": 1.001211166381836, "step": 576 }, { "epoch": 1.72, "logps_train/policy_1_2": -199.28858947753906, "logps_train/policy_1_l": -171.61305236816406, "logps_train/policy_1_w": -101.75839233398438, "logps_train/policy_2_2": -158.00836181640625, "logps_train/policy_2_w": -139.59124755859375, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.6166493892669678, "rewards_train/1-l": -1.5706799030303955, "rewards_train/1-w": 3.0397861003875732, "rewards_train/2-2": 2.98402738571167, "rewards_train/2-w": 1.5658762454986572, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.610466003417969, "rewards_train/margins_1": 1.4231367111206055, "rewards_train/margins_2": 1.4181511402130127, "step": 576 }, { "epoch": 1.72, "logps_train/policy_1_2": -114.4399642944336, "logps_train/policy_1_l": -101.7279052734375, "logps_train/policy_1_w": -108.6433334350586, "logps_train/policy_2_2": -86.85344696044922, "logps_train/policy_2_w": -144.35853576660156, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.2294411659240723, "rewards_train/1-l": -0.9302617311477661, "rewards_train/1-w": 2.6559791564941406, "rewards_train/2-2": 2.384967803955078, "rewards_train/2-w": 1.2641465663909912, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.5862408876419067, "rewards_train/margins_1": 1.4265379905700684, "rewards_train/margins_2": 1.120821237564087, "step": 576 }, { "epoch": 1.72, "logps_train/policy_1_2": -110.789306640625, "logps_train/policy_1_l": -170.6913299560547, "logps_train/policy_1_w": -114.14962005615234, "logps_train/policy_2_2": -92.7447280883789, "logps_train/policy_2_w": -145.08926391601562, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.8085697889328003, "rewards_train/1-l": -2.192960262298584, "rewards_train/1-w": 2.712381362915039, "rewards_train/2-2": 2.455996036529541, "rewards_train/2-w": 1.4582610130310059, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.905341625213623, "rewards_train/margins_1": 0.9038115739822388, "rewards_train/margins_2": 0.9977350234985352, "step": 576 }, { "epoch": 1.72, "logps_train/policy_1_2": -304.4637451171875, "logps_train/policy_1_l": -279.411376953125, "logps_train/policy_1_w": -192.90293884277344, "logps_train/policy_2_2": -239.9053497314453, "logps_train/policy_2_w": -256.25640869140625, "logps_train/ref_1_2": -316.0, "logps_train/ref_1_l": -242.0, "logps_train/ref_1_w": -232.0, "logps_train/ref_2_2": -282.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 1.2405383586883545, "rewards_train/1-l": -3.778832197189331, "rewards_train/1-w": 3.9409563541412354, "rewards_train/2-2": 4.2468671798706055, "rewards_train/2-w": 1.1821696758270264, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.719788551330566, "rewards_train/margins_1": 2.700417995452881, "rewards_train/margins_2": 3.064697504043579, "step": 576 }, { "epoch": 1.72, "logps_train/policy_1_2": -112.00126647949219, "logps_train/policy_1_l": -113.16617584228516, "logps_train/policy_1_w": -78.34797668457031, "logps_train/policy_2_2": -86.08226013183594, "logps_train/policy_2_w": -99.6636962890625, "logps_train/ref_1_2": -125.0, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -112.5, "logps_train/ref_2_w": -115.5, "rewards_train/1-2": 1.3076863288879395, "rewards_train/1-l": -1.849820613861084, "rewards_train/1-w": 2.713639736175537, "rewards_train/2-2": 2.634547710418701, "rewards_train/2-w": 1.5945684909820557, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.563460350036621, "rewards_train/margins_1": 1.4059534072875977, "rewards_train/margins_2": 1.0399792194366455, "step": 576 }, { "epoch": 1.73, "logps_train/policy_1_2": -168.46051025390625, "logps_train/policy_1_l": -160.39959716796875, "logps_train/policy_1_w": -149.34371948242188, "logps_train/policy_2_2": -138.13272094726562, "logps_train/policy_2_w": -181.91917419433594, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.6392995119094849, "rewards_train/1-l": -1.7774587869644165, "rewards_train/1-w": 3.398441791534424, "rewards_train/2-2": 2.6989355087280273, "rewards_train/2-w": 2.0252699851989746, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.17590057849884, "rewards_train/margins_1": 1.759142279624939, "rewards_train/margins_2": 0.6736655235290527, "step": 577 }, { "epoch": 1.73, "logps_train/policy_1_2": -107.24896240234375, "logps_train/policy_1_l": -127.30741882324219, "logps_train/policy_1_w": -52.54181671142578, "logps_train/policy_2_2": -80.71451568603516, "logps_train/policy_2_w": -76.3189468383789, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -70.5, "logps_train/ref_2_2": -101.0, "logps_train/ref_2_w": -86.0, "rewards_train/1-2": 0.6790102124214172, "rewards_train/1-l": -1.9912885427474976, "rewards_train/1-w": 1.808513879776001, "rewards_train/2-2": 2.0168299674987793, "rewards_train/2-w": 0.9667870402336121, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7998024225234985, "rewards_train/margins_1": 1.1295036673545837, "rewards_train/margins_2": 1.0500429272651672, "step": 577 }, { "epoch": 1.73, "logps_train/policy_1_2": -253.61317443847656, "logps_train/policy_1_l": -236.94149780273438, "logps_train/policy_1_w": -154.81655883789062, "logps_train/policy_2_2": -198.33358764648438, "logps_train/policy_2_w": -202.69114685058594, "logps_train/ref_1_2": -272.0, "logps_train/ref_1_l": -213.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -238.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.8074320554733276, "rewards_train/1-l": -2.3582117557525635, "rewards_train/1-w": 3.4290854930877686, "rewards_train/2-2": 3.954141616821289, "rewards_train/2-w": 1.8875263929367065, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.787297248840332, "rewards_train/margins_1": 1.621653437614441, "rewards_train/margins_2": 2.0666152238845825, "step": 577 }, { "epoch": 1.73, "logps_train/policy_1_2": -98.87937927246094, "logps_train/policy_1_l": -133.29220581054688, "logps_train/policy_1_w": -103.32119750976562, "logps_train/policy_2_2": -83.59260559082031, "logps_train/policy_2_w": -139.35153198242188, "logps_train/ref_1_2": -114.5, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 1.578077793121338, "rewards_train/1-l": -1.480075478553772, "rewards_train/1-w": 2.9737391471862793, "rewards_train/2-2": 2.1399588584899902, "rewards_train/2-w": 1.159377932548523, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.453814625740051, "rewards_train/margins_1": 1.3956613540649414, "rewards_train/margins_2": 0.9805809259414673, "step": 577 }, { "epoch": 1.73, "logps_train/policy_1_2": -118.66368103027344, "logps_train/policy_1_l": -152.25775146484375, "logps_train/policy_1_w": -98.59394073486328, "logps_train/policy_2_2": -96.46575927734375, "logps_train/policy_2_w": -119.2895278930664, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 1.7824599742889404, "rewards_train/1-l": -2.2368831634521484, "rewards_train/1-w": 2.4240283966064453, "rewards_train/2-2": 2.624908685684204, "rewards_train/2-w": 1.3823752403259277, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.660911560058594, "rewards_train/margins_1": 0.6415684223175049, "rewards_train/margins_2": 1.2425334453582764, "step": 577 }, { "epoch": 1.73, "logps_train/policy_1_2": -147.79910278320312, "logps_train/policy_1_l": -123.89483642578125, "logps_train/policy_1_w": -92.38629150390625, "logps_train/policy_2_2": -124.2149658203125, "logps_train/policy_2_w": -126.96955108642578, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -114.5, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 1.3224326372146606, "rewards_train/1-l": -0.9301086664199829, "rewards_train/1-w": 2.920746326446533, "rewards_train/2-2": 2.2144415378570557, "rewards_train/2-w": 1.7952324151992798, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.850854992866516, "rewards_train/margins_1": 1.5983136892318726, "rewards_train/margins_2": 0.4192091226577759, "step": 577 }, { "epoch": 1.73, "logps_train/policy_1_2": -190.18048095703125, "logps_train/policy_1_l": -191.1999053955078, "logps_train/policy_1_w": -186.46224975585938, "logps_train/policy_2_2": -165.0513153076172, "logps_train/policy_2_w": -218.39974975585938, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -221.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 2.63820219039917, "rewards_train/1-l": -1.791865587234497, "rewards_train/1-w": 3.405338764190674, "rewards_train/2-2": 3.738618850708008, "rewards_train/2-w": 2.0381507873535156, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.197204351425171, "rewards_train/margins_1": 0.7671365737915039, "rewards_train/margins_2": 1.7004680633544922, "step": 577 }, { "epoch": 1.73, "logps_train/policy_1_2": -207.74900817871094, "logps_train/policy_1_l": -183.78741455078125, "logps_train/policy_1_w": -128.89334106445312, "logps_train/policy_2_2": -168.98672485351562, "logps_train/policy_2_w": -163.68914794921875, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.5532243251800537, "rewards_train/1-l": -2.2164359092712402, "rewards_train/1-w": 3.1528544425964355, "rewards_train/2-2": 3.0450778007507324, "rewards_train/2-w": 2.03733491897583, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.369290351867676, "rewards_train/margins_1": 1.5996301174163818, "rewards_train/margins_2": 1.0077428817749023, "step": 577 }, { "epoch": 1.73, "learning_rate": 2.5373716586730047e-07, "loss": 0.4141, "step": 578 }, { "epoch": 1.73, "logps_train/policy_1_2": -173.22695922851562, "logps_train/policy_1_l": -198.75881958007812, "logps_train/policy_1_w": -107.561767578125, "logps_train/policy_2_2": -141.64480590820312, "logps_train/policy_2_w": -130.14398193359375, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.578866958618164, "rewards_train/1-l": -2.384477138519287, "rewards_train/1-w": 2.8313233852386475, "rewards_train/2-2": 2.8683323860168457, "rewards_train/2-w": 1.9910717010498047, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.215800523757935, "rewards_train/margins_1": 1.2524564266204834, "rewards_train/margins_2": 0.877260684967041, "step": 578 }, { "epoch": 1.73, "logps_train/policy_1_2": -142.42901611328125, "logps_train/policy_1_l": -236.44493103027344, "logps_train/policy_1_w": -129.98532104492188, "logps_train/policy_2_2": -117.27674865722656, "logps_train/policy_2_w": -158.98997497558594, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 1.9008488655090332, "rewards_train/1-l": -2.6815037727355957, "rewards_train/1-w": 3.01758074760437, "rewards_train/2-2": 2.7957630157470703, "rewards_train/2-w": 2.0151631832122803, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.699084520339966, "rewards_train/margins_1": 1.116731882095337, "rewards_train/margins_2": 0.78059983253479, "step": 578 }, { "epoch": 1.73, "logps_train/policy_1_2": -119.30010986328125, "logps_train/policy_1_l": -185.2526397705078, "logps_train/policy_1_w": -110.01868438720703, "logps_train/policy_2_2": -96.284423828125, "logps_train/policy_2_w": -135.8502960205078, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -117.5, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.3234553337097168, "rewards_train/1-l": -1.785809874534607, "rewards_train/1-w": 2.8746938705444336, "rewards_train/2-2": 2.1125736236572266, "rewards_train/2-w": 1.8368451595306396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.6605037450790405, "rewards_train/margins_1": 1.5512385368347168, "rewards_train/margins_2": 0.2757284641265869, "step": 578 }, { "epoch": 1.73, "logps_train/policy_1_2": -147.44915771484375, "logps_train/policy_1_l": -183.58834838867188, "logps_train/policy_1_w": -95.0177001953125, "logps_train/policy_2_2": -113.98905944824219, "logps_train/policy_2_w": -130.40782165527344, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 0.676959753036499, "rewards_train/1-l": -3.2510228157043457, "rewards_train/1-w": 2.3296756744384766, "rewards_train/2-2": 2.099531888961792, "rewards_train/2-w": 1.0842175483703613, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.580698490142822, "rewards_train/margins_1": 1.6527159214019775, "rewards_train/margins_2": 1.0153143405914307, "step": 578 }, { "epoch": 1.73, "logps_train/policy_1_2": -92.65009307861328, "logps_train/policy_1_l": -83.53240203857422, "logps_train/policy_1_w": -81.69003295898438, "logps_train/policy_2_2": -72.8560562133789, "logps_train/policy_2_w": -106.36652374267578, "logps_train/ref_1_2": -101.0, "logps_train/ref_1_l": -73.0, "logps_train/ref_1_w": -103.5, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 0.843584418296814, "rewards_train/1-l": -1.0364919900894165, "rewards_train/1-w": 2.1812903881073, "rewards_train/2-2": 1.8495512008666992, "rewards_train/2-w": 0.9758480787277222, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.2177823781967163, "rewards_train/margins_1": 1.3377059698104858, "rewards_train/margins_2": 0.873703122138977, "step": 578 }, { "epoch": 1.73, "logps_train/policy_1_2": -193.4736785888672, "logps_train/policy_1_l": -251.81964111328125, "logps_train/policy_1_w": -174.35369873046875, "logps_train/policy_2_2": -156.7330322265625, "logps_train/policy_2_w": -236.83880615234375, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -220.0, "logps_train/ref_1_w": -215.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.6401317119598389, "rewards_train/1-l": -3.1694650650024414, "rewards_train/1-w": 4.102128982543945, "rewards_train/2-2": 3.051697015762329, "rewards_train/2-w": 1.7473700046539307, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.271594047546387, "rewards_train/margins_1": 2.4619972705841064, "rewards_train/margins_2": 1.3043270111083984, "step": 578 }, { "epoch": 1.73, "logps_train/policy_1_2": -155.5872802734375, "logps_train/policy_1_l": -208.95614624023438, "logps_train/policy_1_w": -91.48753356933594, "logps_train/policy_2_2": -135.6497802734375, "logps_train/policy_2_w": -107.95603942871094, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 1.7475228309631348, "rewards_train/1-l": -3.138583183288574, "rewards_train/1-w": 2.313746690750122, "rewards_train/2-2": 2.722522258758545, "rewards_train/2-w": 1.8817393779754639, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.452329874038696, "rewards_train/margins_1": 0.5662238597869873, "rewards_train/margins_2": 0.840782880783081, "step": 578 }, { "epoch": 1.73, "logps_train/policy_1_2": -172.80487060546875, "logps_train/policy_1_l": -186.5395050048828, "logps_train/policy_1_w": -198.8170166015625, "logps_train/policy_2_2": -140.81121826171875, "logps_train/policy_2_w": -270.45465087890625, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -247.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -288.0, "rewards_train/1-2": 1.649200439453125, "rewards_train/1-l": -1.3010201454162598, "rewards_train/1-w": 4.844861030578613, "rewards_train/2-2": 2.9696598052978516, "rewards_train/2-w": 1.6560980081558228, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.145881175994873, "rewards_train/margins_1": 3.1956605911254883, "rewards_train/margins_2": 1.3135617971420288, "step": 578 }, { "epoch": 1.73, "logps_train/policy_1_2": -212.663818359375, "logps_train/policy_1_l": -176.9385986328125, "logps_train/policy_1_w": -125.0169448852539, "logps_train/policy_2_2": -175.8065185546875, "logps_train/policy_2_w": -167.12464904785156, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.8898675441741943, "rewards_train/1-l": -1.2335079908370972, "rewards_train/1-w": 3.2498676776885986, "rewards_train/2-2": 3.213099241256714, "rewards_train/2-w": 1.8640973567962646, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.483375668525696, "rewards_train/margins_1": 1.3600001335144043, "rewards_train/margins_2": 1.3490018844604492, "step": 579 }, { "epoch": 1.73, "logps_train/policy_1_2": -136.84555053710938, "logps_train/policy_1_l": -220.62091064453125, "logps_train/policy_1_w": -122.15554809570312, "logps_train/policy_2_2": -100.31498718261719, "logps_train/policy_2_w": -173.59405517578125, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 1.3841948509216309, "rewards_train/1-l": -3.4472477436065674, "rewards_train/1-w": 3.1012420654296875, "rewards_train/2-2": 2.721625804901123, "rewards_train/2-w": 1.3640336990356445, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.548489809036255, "rewards_train/margins_1": 1.7170472145080566, "rewards_train/margins_2": 1.3575921058654785, "step": 579 }, { "epoch": 1.73, "logps_train/policy_1_2": -166.40213012695312, "logps_train/policy_1_l": -281.7917175292969, "logps_train/policy_1_w": -139.41903686523438, "logps_train/policy_2_2": -133.9148406982422, "logps_train/policy_2_w": -180.67138671875, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -241.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.9082236289978027, "rewards_train/1-l": -4.080540180206299, "rewards_train/1-w": 3.1721582412719727, "rewards_train/2-2": 3.298750162124634, "rewards_train/2-w": 1.7465330362319946, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.2526984214782715, "rewards_train/margins_1": 1.26393461227417, "rewards_train/margins_2": 1.5522171258926392, "step": 579 }, { "epoch": 1.73, "logps_train/policy_1_2": -133.83303833007812, "logps_train/policy_1_l": -152.3148193359375, "logps_train/policy_1_w": -88.83883666992188, "logps_train/policy_2_2": -99.05870056152344, "logps_train/policy_2_w": -134.1222381591797, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.2956023216247559, "rewards_train/1-l": -2.2432000637054443, "rewards_train/1-w": 2.4899444580078125, "rewards_train/2-2": 2.7042858600616455, "rewards_train/2-w": 0.9455879926681519, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.733144521713257, "rewards_train/margins_1": 1.1943421363830566, "rewards_train/margins_2": 1.7586978673934937, "step": 579 }, { "epoch": 1.73, "logps_train/policy_1_2": -170.83334350585938, "logps_train/policy_1_l": -122.83607482910156, "logps_train/policy_1_w": -131.33895874023438, "logps_train/policy_2_2": -130.13768005371094, "logps_train/policy_2_w": -170.58193969726562, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.5272116661071777, "rewards_train/1-l": -1.842005729675293, "rewards_train/1-w": 3.1938388347625732, "rewards_train/2-2": 3.490920305252075, "rewards_train/2-w": 1.8011796474456787, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.035844564437866, "rewards_train/margins_1": 1.6666271686553955, "rewards_train/margins_2": 1.6897406578063965, "step": 579 }, { "epoch": 1.73, "logps_train/policy_1_2": -156.64013671875, "logps_train/policy_1_l": -158.30458068847656, "logps_train/policy_1_w": -140.9307403564453, "logps_train/policy_2_2": -121.89574432373047, "logps_train/policy_2_w": -173.03741455078125, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.573485255241394, "rewards_train/1-l": -2.2384655475616455, "rewards_train/1-w": 2.741105318069458, "rewards_train/2-2": 2.815894603729248, "rewards_train/2-w": 1.2698922157287598, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.9795708656311035, "rewards_train/margins_1": 1.167620062828064, "rewards_train/margins_2": 1.5460023880004883, "step": 579 }, { "epoch": 1.73, "logps_train/policy_1_2": -122.07051086425781, "logps_train/policy_1_l": -142.98046875, "logps_train/policy_1_w": -91.3741455078125, "logps_train/policy_2_2": -94.48149108886719, "logps_train/policy_2_w": -109.18601989746094, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 0.940214216709137, "rewards_train/1-l": -1.911327838897705, "rewards_train/1-w": 1.9504753351211548, "rewards_train/2-2": 2.261420726776123, "rewards_train/2-w": 1.2802255153656006, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.86180317401886, "rewards_train/margins_1": 1.0102611184120178, "rewards_train/margins_2": 0.9811952114105225, "step": 579 }, { "epoch": 1.73, "logps_train/policy_1_2": -251.08187866210938, "logps_train/policy_1_l": -236.18307495117188, "logps_train/policy_1_w": -213.82257080078125, "logps_train/policy_2_2": -201.27349853515625, "logps_train/policy_2_w": -297.076904296875, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -217.0, "logps_train/ref_1_w": -258.0, "logps_train/ref_2_2": -243.0, "logps_train/ref_2_w": -306.0, "rewards_train/1-2": 1.8597809076309204, "rewards_train/1-l": -1.8737751245498657, "rewards_train/1-w": 4.480242729187012, "rewards_train/2-2": 4.16718053817749, "rewards_train/2-w": 0.8298097848892212, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.354017853736877, "rewards_train/margins_1": 2.6204618215560913, "rewards_train/margins_2": 3.337370753288269, "step": 579 }, { "epoch": 1.74, "learning_rate": 2.430054250856412e-07, "loss": 0.4405, "step": 580 }, { "epoch": 1.74, "logps_train/policy_1_2": -246.19827270507812, "logps_train/policy_1_l": -168.05075073242188, "logps_train/policy_1_w": -176.7272186279297, "logps_train/policy_2_2": -198.01968383789062, "logps_train/policy_2_w": -225.32562255859375, "logps_train/ref_1_2": -262.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -235.0, "logps_train/ref_2_w": -236.0, "rewards_train/1-2": 1.5379853248596191, "rewards_train/1-l": -2.2142534255981445, "rewards_train/1-w": 3.2108726501464844, "rewards_train/2-2": 3.6597514152526855, "rewards_train/2-w": 1.1080639362335205, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.425126075744629, "rewards_train/margins_1": 1.6728873252868652, "rewards_train/margins_2": 2.551687479019165, "step": 580 }, { "epoch": 1.74, "logps_train/policy_1_2": -176.724609375, "logps_train/policy_1_l": -198.01995849609375, "logps_train/policy_1_w": -149.68954467773438, "logps_train/policy_2_2": -133.61477661132812, "logps_train/policy_2_w": -200.4326934814453, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 1.5384762287139893, "rewards_train/1-l": -2.634026527404785, "rewards_train/1-w": 3.380265951156616, "rewards_train/2-2": 3.2275853157043457, "rewards_train/2-w": 1.4571216106414795, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.014292478561401, "rewards_train/margins_1": 1.841789722442627, "rewards_train/margins_2": 1.7704637050628662, "step": 580 }, { "epoch": 1.74, "logps_train/policy_1_2": -217.4793701171875, "logps_train/policy_1_l": -253.8169403076172, "logps_train/policy_1_w": -194.92669677734375, "logps_train/policy_2_2": -170.7402801513672, "logps_train/policy_2_w": -242.26364135742188, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -233.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -262.0, "rewards_train/1-2": 2.2301878929138184, "rewards_train/1-l": -2.744194746017456, "rewards_train/1-w": 3.7901434898376465, "rewards_train/2-2": 4.0540971755981445, "rewards_train/2-w": 2.000200033187866, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.5343382358551025, "rewards_train/margins_1": 1.5599555969238281, "rewards_train/margins_2": 2.0538971424102783, "step": 580 }, { "epoch": 1.74, "logps_train/policy_1_2": -131.95411682128906, "logps_train/policy_1_l": -166.45008850097656, "logps_train/policy_1_w": -53.669212341308594, "logps_train/policy_2_2": -95.89453125, "logps_train/policy_2_w": -72.89204406738281, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -70.0, "logps_train/ref_2_2": -114.5, "logps_train/ref_2_w": -83.0, "rewards_train/1-2": 0.4170890152454376, "rewards_train/1-l": -3.0395402908325195, "rewards_train/1-w": 1.6381570100784302, "rewards_train/2-2": 1.8699216842651367, "rewards_train/2-w": 1.0107953548431396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.67769730091095, "rewards_train/margins_1": 1.2210679948329926, "rewards_train/margins_2": 0.8591263294219971, "step": 580 }, { "epoch": 1.74, "logps_train/policy_1_2": -203.11514282226562, "logps_train/policy_1_l": -207.06600952148438, "logps_train/policy_1_w": -102.43318176269531, "logps_train/policy_2_2": -147.75782775878906, "logps_train/policy_2_w": -135.52505493164062, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.2697348594665527, "rewards_train/1-l": -2.300351142883301, "rewards_train/1-w": 2.8848066329956055, "rewards_train/2-2": 3.347654342651367, "rewards_train/2-w": 1.6537445783615112, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.185157775878906, "rewards_train/margins_1": 1.6150717735290527, "rewards_train/margins_2": 1.693909764289856, "step": 580 }, { "epoch": 1.74, "logps_train/policy_1_2": -111.29422760009766, "logps_train/policy_1_l": -136.25552368164062, "logps_train/policy_1_w": -83.70985412597656, "logps_train/policy_2_2": -87.60244750976562, "logps_train/policy_2_w": -114.92086029052734, "logps_train/ref_1_2": -125.5, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -110.5, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.417452335357666, "rewards_train/1-l": -2.2671539783477783, "rewards_train/1-w": 2.3649516105651855, "rewards_train/2-2": 2.255380153656006, "rewards_train/2-w": 0.8188511729240417, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.632105588912964, "rewards_train/margins_1": 0.9474992752075195, "rewards_train/margins_2": 1.4365289807319641, "step": 580 }, { "epoch": 1.74, "logps_train/policy_1_2": -248.4174346923828, "logps_train/policy_1_l": -232.51675415039062, "logps_train/policy_1_w": -143.2059326171875, "logps_train/policy_2_2": -202.50045776367188, "logps_train/policy_2_w": -186.91773986816406, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -242.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 2.1012251377105713, "rewards_train/1-l": -2.2137842178344727, "rewards_train/1-w": 4.055968761444092, "rewards_train/2-2": 3.9343295097351074, "rewards_train/2-w": 2.018383026123047, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.2697529792785645, "rewards_train/margins_1": 1.9547436237335205, "rewards_train/margins_2": 1.9159464836120605, "step": 580 }, { "epoch": 1.74, "logps_train/policy_1_2": -173.9718780517578, "logps_train/policy_1_l": -209.05874633789062, "logps_train/policy_1_w": -154.52386474609375, "logps_train/policy_2_2": -125.34186553955078, "logps_train/policy_2_w": -205.97116088867188, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": 0.9590620398521423, "rewards_train/1-l": -2.583510160446167, "rewards_train/1-w": 3.000739336013794, "rewards_train/2-2": 2.850188732147217, "rewards_train/2-w": 0.324759840965271, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.584249496459961, "rewards_train/margins_1": 2.0416772961616516, "rewards_train/margins_2": 2.525428891181946, "step": 580 }, { "epoch": 1.74, "logps_train/policy_1_2": -153.35296630859375, "logps_train/policy_1_l": -129.8649139404297, "logps_train/policy_1_w": -154.75460815429688, "logps_train/policy_2_2": -123.21920776367188, "logps_train/policy_2_w": -192.6660614013672, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.7467340230941772, "rewards_train/1-l": -1.2212570905685425, "rewards_train/1-w": 2.871413230895996, "rewards_train/2-2": 2.6253457069396973, "rewards_train/2-w": 1.089644193649292, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.092670321464539, "rewards_train/margins_1": 1.1246792078018188, "rewards_train/margins_2": 1.5357015132904053, "step": 581 }, { "epoch": 1.74, "logps_train/policy_1_2": -135.50357055664062, "logps_train/policy_1_l": -113.57179260253906, "logps_train/policy_1_w": -63.13164520263672, "logps_train/policy_2_2": -95.35585021972656, "logps_train/policy_2_w": -86.84979248046875, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -103.0, "rewards_train/1-2": 0.4957374930381775, "rewards_train/1-l": -1.7435071468353271, "rewards_train/1-w": 2.3805856704711914, "rewards_train/2-2": 2.535900115966797, "rewards_train/2-w": 1.6072081327438354, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.1240928173065186, "rewards_train/margins_1": 1.884848177433014, "rewards_train/margins_2": 0.9286919832229614, "step": 581 }, { "epoch": 1.74, "logps_train/policy_1_2": -160.01380920410156, "logps_train/policy_1_l": -145.96519470214844, "logps_train/policy_1_w": -141.00360107421875, "logps_train/policy_2_2": -130.55575561523438, "logps_train/policy_2_w": -184.05380249023438, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 2.0251808166503906, "rewards_train/1-l": -1.6723003387451172, "rewards_train/1-w": 3.5062811374664307, "rewards_train/2-2": 3.3932528495788574, "rewards_train/2-w": 2.1114163398742676, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.178581476211548, "rewards_train/margins_1": 1.48110032081604, "rewards_train/margins_2": 1.2818365097045898, "step": 581 }, { "epoch": 1.74, "logps_train/policy_1_2": -196.4178466796875, "logps_train/policy_1_l": -225.52566528320312, "logps_train/policy_1_w": -179.2315216064453, "logps_train/policy_2_2": -157.2393798828125, "logps_train/policy_2_w": -223.22377014160156, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -216.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -243.0, "rewards_train/1-2": 2.103137493133545, "rewards_train/1-l": -1.677566409111023, "rewards_train/1-w": 3.692473888397217, "rewards_train/2-2": 3.529578685760498, "rewards_train/2-w": 1.9729350805282593, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.37004029750824, "rewards_train/margins_1": 1.5893363952636719, "rewards_train/margins_2": 1.5566436052322388, "step": 581 }, { "epoch": 1.74, "logps_train/policy_1_2": -155.06890869140625, "logps_train/policy_1_l": -133.80078125, "logps_train/policy_1_w": -109.59576416015625, "logps_train/policy_2_2": -114.39659118652344, "logps_train/policy_2_w": -149.6363525390625, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 0.8931092023849487, "rewards_train/1-l": -2.119337320327759, "rewards_train/1-w": 2.1611268520355225, "rewards_train/2-2": 2.554872751235962, "rewards_train/2-w": 0.8137087821960449, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.280464172363281, "rewards_train/margins_1": 1.2680176496505737, "rewards_train/margins_2": 1.741163969039917, "step": 581 }, { "epoch": 1.74, "logps_train/policy_1_2": -146.56878662109375, "logps_train/policy_1_l": -169.26658630371094, "logps_train/policy_1_w": -146.4056396484375, "logps_train/policy_2_2": -106.3840103149414, "logps_train/policy_2_w": -198.98619079589844, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.427887201309204, "rewards_train/1-l": -1.6110332012176514, "rewards_train/1-w": 2.987560510635376, "rewards_train/2-2": 2.91198992729187, "rewards_train/2-w": 0.7888813018798828, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.598593711853027, "rewards_train/margins_1": 1.5596733093261719, "rewards_train/margins_2": 2.1231086254119873, "step": 581 }, { "epoch": 1.74, "logps_train/policy_1_2": -189.2412567138672, "logps_train/policy_1_l": -180.37393188476562, "logps_train/policy_1_w": -91.62406158447266, "logps_train/policy_2_2": -141.02032470703125, "logps_train/policy_2_w": -130.921142578125, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.2215783596038818, "rewards_train/1-l": -2.133291482925415, "rewards_train/1-w": 3.2094688415527344, "rewards_train/2-2": 3.1721856594085693, "rewards_train/2-w": 1.8793692588806152, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.342760324478149, "rewards_train/margins_1": 1.9878904819488525, "rewards_train/margins_2": 1.292816400527954, "step": 581 }, { "epoch": 1.74, "logps_train/policy_1_2": -145.76895141601562, "logps_train/policy_1_l": -215.34652709960938, "logps_train/policy_1_w": -99.34922790527344, "logps_train/policy_2_2": -108.087646484375, "logps_train/policy_2_w": -135.91253662109375, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 1.1840417385101318, "rewards_train/1-l": -3.6983227729797363, "rewards_train/1-w": 2.583827257156372, "rewards_train/2-2": 2.56545352935791, "rewards_train/2-w": 1.3212461471557617, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.282150030136108, "rewards_train/margins_1": 1.3997855186462402, "rewards_train/margins_2": 1.2442073822021484, "step": 581 }, { "epoch": 1.74, "learning_rate": 2.3249396299565685e-07, "loss": 0.3941, "step": 582 }, { "epoch": 1.74, "logps_train/policy_1_2": -188.89080810546875, "logps_train/policy_1_l": -181.5914306640625, "logps_train/policy_1_w": -138.55178833007812, "logps_train/policy_2_2": -148.82568359375, "logps_train/policy_2_w": -183.576171875, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 2.0374813079833984, "rewards_train/1-l": -1.8364858627319336, "rewards_train/1-w": 3.587007522583008, "rewards_train/2-2": 3.6178221702575684, "rewards_train/2-w": 1.7408195734024048, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.423493385314941, "rewards_train/margins_1": 1.5495262145996094, "rewards_train/margins_2": 1.8770025968551636, "step": 582 }, { "epoch": 1.74, "logps_train/policy_1_2": -173.5950469970703, "logps_train/policy_1_l": -131.82598876953125, "logps_train/policy_1_w": -189.57919311523438, "logps_train/policy_2_2": -132.38217163085938, "logps_train/policy_2_w": -249.88299560546875, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -230.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -262.0, "rewards_train/1-2": 1.9218440055847168, "rewards_train/1-l": -1.1456851959228516, "rewards_train/1-w": 4.098428726196289, "rewards_train/2-2": 3.7348296642303467, "rewards_train/2-w": 1.2363104820251465, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.244113922119141, "rewards_train/margins_1": 2.1765847206115723, "rewards_train/margins_2": 2.4985191822052, "step": 582 }, { "epoch": 1.74, "logps_train/policy_1_2": -130.780517578125, "logps_train/policy_1_l": -177.2645263671875, "logps_train/policy_1_w": -85.5223159790039, "logps_train/policy_2_2": -96.7277603149414, "logps_train/policy_2_w": -138.6983642578125, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 0.8711660504341125, "rewards_train/1-l": -3.4927616119384766, "rewards_train/1-w": 2.6571435928344727, "rewards_train/2-2": 2.292067527770996, "rewards_train/2-w": 0.9989137053489685, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.149905204772949, "rewards_train/margins_1": 1.78597754240036, "rewards_train/margins_2": 1.2931538224220276, "step": 582 }, { "epoch": 1.74, "logps_train/policy_1_2": -134.29135131835938, "logps_train/policy_1_l": -183.4678497314453, "logps_train/policy_1_w": -86.95526123046875, "logps_train/policy_2_2": -89.6501235961914, "logps_train/policy_2_w": -117.38249206542969, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.1989905834197998, "rewards_train/1-l": -2.465534210205078, "rewards_train/1-w": 2.350372314453125, "rewards_train/2-2": 2.6865499019622803, "rewards_train/2-w": 1.336359977722168, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.815906524658203, "rewards_train/margins_1": 1.1513817310333252, "rewards_train/margins_2": 1.3501899242401123, "step": 582 }, { "epoch": 1.74, "logps_train/policy_1_2": -160.80673217773438, "logps_train/policy_1_l": -128.41099548339844, "logps_train/policy_1_w": -136.60076904296875, "logps_train/policy_2_2": -117.41947937011719, "logps_train/policy_2_w": -180.1101531982422, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -102.5, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.3298732042312622, "rewards_train/1-l": -2.623131275177002, "rewards_train/1-w": 3.209453582763672, "rewards_train/2-2": 2.9338326454162598, "rewards_train/2-w": 0.8827349543571472, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.832584857940674, "rewards_train/margins_1": 1.8795803785324097, "rewards_train/margins_2": 2.0510976910591125, "step": 582 }, { "epoch": 1.74, "logps_train/policy_1_2": -150.5545196533203, "logps_train/policy_1_l": -182.62368774414062, "logps_train/policy_1_w": -94.37870025634766, "logps_train/policy_2_2": -116.21141052246094, "logps_train/policy_2_w": -120.8330078125, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.080046534538269, "rewards_train/1-l": -2.4049947261810303, "rewards_train/1-w": 2.358175277709961, "rewards_train/2-2": 2.537843704223633, "rewards_train/2-w": 1.316699504852295, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.763170003890991, "rewards_train/margins_1": 1.278128743171692, "rewards_train/margins_2": 1.221144199371338, "step": 582 }, { "epoch": 1.74, "logps_train/policy_1_2": -120.03022766113281, "logps_train/policy_1_l": -213.5609130859375, "logps_train/policy_1_w": -155.80911254882812, "logps_train/policy_2_2": -99.5838394165039, "logps_train/policy_2_w": -191.18997192382812, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 1.9657275676727295, "rewards_train/1-l": -2.7842164039611816, "rewards_train/1-w": 3.6440887451171875, "rewards_train/2-2": 2.73224139213562, "rewards_train/2-w": 2.3685028553009033, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.428305149078369, "rewards_train/margins_1": 1.678361177444458, "rewards_train/margins_2": 0.3637385368347168, "step": 582 }, { "epoch": 1.74, "logps_train/policy_1_2": -214.2769012451172, "logps_train/policy_1_l": -195.18124389648438, "logps_train/policy_1_w": -152.68984985351562, "logps_train/policy_2_2": -174.86366271972656, "logps_train/policy_2_w": -190.93508911132812, "logps_train/ref_1_2": -237.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -215.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 2.3035597801208496, "rewards_train/1-l": -2.3681249618530273, "rewards_train/1-w": 3.6044530868530273, "rewards_train/2-2": 4.0261335372924805, "rewards_train/2-w": 2.6768033504486084, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.972578048706055, "rewards_train/margins_1": 1.3008933067321777, "rewards_train/margins_2": 1.349330186843872, "step": 582 }, { "epoch": 1.75, "logps_train/policy_1_2": -101.9356460571289, "logps_train/policy_1_l": -61.97941207885742, "logps_train/policy_1_w": -80.47796630859375, "logps_train/policy_2_2": -74.85100555419922, "logps_train/policy_2_w": -116.36116027832031, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -50.25, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 0.6845608353614807, "rewards_train/1-l": -1.152823805809021, "rewards_train/1-w": 2.677593231201172, "rewards_train/2-2": 1.7547435760498047, "rewards_train/2-w": 1.3830245733261108, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.830417037010193, "rewards_train/margins_1": 1.9930323958396912, "rewards_train/margins_2": 0.37171900272369385, "step": 583 }, { "epoch": 1.75, "logps_train/policy_1_2": -208.6841583251953, "logps_train/policy_1_l": -271.826171875, "logps_train/policy_1_w": -156.87130737304688, "logps_train/policy_2_2": -182.3206024169922, "logps_train/policy_2_w": -190.53688049316406, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -240.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -219.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 2.5893967151641846, "rewards_train/1-l": -3.1413092613220215, "rewards_train/1-w": 4.225368499755859, "rewards_train/2-2": 3.669893264770508, "rewards_train/2-w": 2.690061092376709, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 7.366677761077881, "rewards_train/margins_1": 1.6359717845916748, "rewards_train/margins_2": 0.9798321723937988, "step": 583 }, { "epoch": 1.75, "logps_train/policy_1_2": -220.46665954589844, "logps_train/policy_1_l": -191.57403564453125, "logps_train/policy_1_w": -103.33277893066406, "logps_train/policy_2_2": -167.69020080566406, "logps_train/policy_2_w": -144.08921813964844, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 0.9814602136611938, "rewards_train/1-l": -2.4320125579833984, "rewards_train/1-w": 3.7788314819335938, "rewards_train/2-2": 3.3919169902801514, "rewards_train/2-w": 2.271547317504883, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.210844039916992, "rewards_train/margins_1": 2.7973712682724, "rewards_train/margins_2": 1.1203696727752686, "step": 583 }, { "epoch": 1.75, "logps_train/policy_1_2": -113.31961059570312, "logps_train/policy_1_l": -148.7238311767578, "logps_train/policy_1_w": -123.3740234375, "logps_train/policy_2_2": -91.66688537597656, "logps_train/policy_2_w": -145.18307495117188, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -112.5, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.282882571220398, "rewards_train/1-l": -1.685273289680481, "rewards_train/1-w": 2.333690643310547, "rewards_train/2-2": 2.082920551300049, "rewards_train/2-w": 1.4684107303619385, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.018963932991028, "rewards_train/margins_1": 1.050808072090149, "rewards_train/margins_2": 0.6145098209381104, "step": 583 }, { "epoch": 1.75, "logps_train/policy_1_2": -180.60423278808594, "logps_train/policy_1_l": -195.15603637695312, "logps_train/policy_1_w": -132.4539794921875, "logps_train/policy_2_2": -151.67391967773438, "logps_train/policy_2_w": -166.91036987304688, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.5231703519821167, "rewards_train/1-l": -3.413259506225586, "rewards_train/1-w": 3.082726240158081, "rewards_train/2-2": 2.85370135307312, "rewards_train/2-w": 1.5308387279510498, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.495985746383667, "rewards_train/margins_1": 1.5595558881759644, "rewards_train/margins_2": 1.3228626251220703, "step": 583 }, { "epoch": 1.75, "logps_train/policy_1_2": -112.55591583251953, "logps_train/policy_1_l": -170.32257080078125, "logps_train/policy_1_w": -72.50572204589844, "logps_train/policy_2_2": -88.11634063720703, "logps_train/policy_2_w": -91.17233276367188, "logps_train/ref_1_2": -125.5, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -91.5, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -103.0, "rewards_train/1-2": 1.260814905166626, "rewards_train/1-l": -2.1935839653015137, "rewards_train/1-w": 1.8961079120635986, "rewards_train/2-2": 2.4563345909118652, "rewards_train/2-w": 1.1940951347351074, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.089691877365112, "rewards_train/margins_1": 0.6352930068969727, "rewards_train/margins_2": 1.2622394561767578, "step": 583 }, { "epoch": 1.75, "logps_train/policy_1_2": -159.04388427734375, "logps_train/policy_1_l": -151.51913452148438, "logps_train/policy_1_w": -86.00849151611328, "logps_train/policy_2_2": -125.46669006347656, "logps_train/policy_2_w": -127.71218872070312, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.7893624305725098, "rewards_train/1-l": -2.486288547515869, "rewards_train/1-w": 3.098369598388672, "rewards_train/2-2": 3.140831470489502, "rewards_train/2-w": 1.4084683656692505, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.584658145904541, "rewards_train/margins_1": 1.309007167816162, "rewards_train/margins_2": 1.7323631048202515, "step": 583 }, { "epoch": 1.75, "logps_train/policy_1_2": -250.50741577148438, "logps_train/policy_1_l": -272.96258544921875, "logps_train/policy_1_w": -174.91653442382812, "logps_train/policy_2_2": -188.21730041503906, "logps_train/policy_2_w": -232.04019165039062, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -245.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -234.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 1.6969139575958252, "rewards_train/1-l": -2.852508544921875, "rewards_train/1-w": 4.547408103942871, "rewards_train/2-2": 4.600924968719482, "rewards_train/2-w": 2.7373883724212646, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.399916648864746, "rewards_train/margins_1": 2.850494146347046, "rewards_train/margins_2": 1.8635365962982178, "step": 583 }, { "epoch": 1.75, "learning_rate": 2.222038054975173e-07, "loss": 0.3672, "step": 584 }, { "epoch": 1.75, "logps_train/policy_1_2": -86.79903411865234, "logps_train/policy_1_l": -56.062679290771484, "logps_train/policy_1_w": -55.71754455566406, "logps_train/policy_2_2": -57.210147857666016, "logps_train/policy_2_w": -88.79747009277344, "logps_train/ref_1_2": -93.0, "logps_train/ref_1_l": -53.75, "logps_train/ref_1_w": -75.5, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -97.0, "rewards_train/1-2": 0.6005654335021973, "rewards_train/1-l": -0.23595507442951202, "rewards_train/1-w": 1.9794175624847412, "rewards_train/2-2": 1.7016414403915405, "rewards_train/2-w": 0.7838262915611267, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.2153726369142532, "rewards_train/margins_1": 1.378852128982544, "rewards_train/margins_2": 0.9178151488304138, "step": 584 }, { "epoch": 1.75, "logps_train/policy_1_2": -103.90658569335938, "logps_train/policy_1_l": -143.6116943359375, "logps_train/policy_1_w": -151.0574951171875, "logps_train/policy_2_2": -78.64569091796875, "logps_train/policy_2_w": -193.53733825683594, "logps_train/ref_1_2": -122.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.8495752811431885, "rewards_train/1-l": -2.1740610599517822, "rewards_train/1-w": 2.8536269664764404, "rewards_train/2-2": 2.5643367767333984, "rewards_train/2-w": 0.8361092805862427, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.027688026428223, "rewards_train/margins_1": 1.004051685333252, "rewards_train/margins_2": 1.7282274961471558, "step": 584 }, { "epoch": 1.75, "logps_train/policy_1_2": -122.16671752929688, "logps_train/policy_1_l": -131.15765380859375, "logps_train/policy_1_w": -90.77766418457031, "logps_train/policy_2_2": -90.76039123535156, "logps_train/policy_2_w": -129.14178466796875, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 2.134890556335449, "rewards_train/1-l": -1.7717232704162598, "rewards_train/1-w": 3.0636391639709473, "rewards_train/2-2": 3.2684922218322754, "rewards_train/2-w": 1.6334773302078247, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.835362434387207, "rewards_train/margins_1": 0.928748607635498, "rewards_train/margins_2": 1.6350148916244507, "step": 584 }, { "epoch": 1.75, "logps_train/policy_1_2": -186.9751739501953, "logps_train/policy_1_l": -248.7117462158203, "logps_train/policy_1_w": -135.1588134765625, "logps_train/policy_2_2": -140.83041381835938, "logps_train/policy_2_w": -198.36196899414062, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -224.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.9649823904037476, "rewards_train/1-l": -2.4942214488983154, "rewards_train/1-w": 3.9411497116088867, "rewards_train/2-2": 3.745083808898926, "rewards_train/2-w": 2.116928815841675, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.435371160507202, "rewards_train/margins_1": 1.9761673212051392, "rewards_train/margins_2": 1.628154993057251, "step": 584 }, { "epoch": 1.75, "logps_train/policy_1_2": -148.71160888671875, "logps_train/policy_1_l": -114.94274139404297, "logps_train/policy_1_w": -228.50209045410156, "logps_train/policy_2_2": -116.39315795898438, "logps_train/policy_2_w": -275.6556396484375, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -264.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -290.0, "rewards_train/1-2": 1.8460265398025513, "rewards_train/1-l": -0.6215198040008545, "rewards_train/1-w": 3.5846548080444336, "rewards_train/2-2": 2.780606269836426, "rewards_train/2-w": 1.401623010635376, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.206174612045288, "rewards_train/margins_1": 1.7386282682418823, "rewards_train/margins_2": 1.3789832592010498, "step": 584 }, { "epoch": 1.75, "logps_train/policy_1_2": -202.5434112548828, "logps_train/policy_1_l": -140.70767211914062, "logps_train/policy_1_w": -139.739013671875, "logps_train/policy_2_2": -163.83297729492188, "logps_train/policy_2_w": -180.13096618652344, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 2.1675338745117188, "rewards_train/1-l": -0.8207675218582153, "rewards_train/1-w": 4.041723251342773, "rewards_train/2-2": 3.8252968788146973, "rewards_train/2-w": 2.2267470359802246, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.862490773200989, "rewards_train/margins_1": 1.8741893768310547, "rewards_train/margins_2": 1.5985498428344727, "step": 584 }, { "epoch": 1.75, "logps_train/policy_1_2": -217.53048706054688, "logps_train/policy_1_l": -244.328857421875, "logps_train/policy_1_w": -175.89427185058594, "logps_train/policy_2_2": -182.72604370117188, "logps_train/policy_2_w": -219.67071533203125, "logps_train/ref_1_2": -245.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -224.0, "logps_train/ref_2_w": -244.0, "rewards_train/1-2": 2.7000763416290283, "rewards_train/1-l": -1.7151126861572266, "rewards_train/1-w": 4.052760601043701, "rewards_train/2-2": 4.128958702087402, "rewards_train/2-w": 2.414179563522339, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.767873287200928, "rewards_train/margins_1": 1.3526842594146729, "rewards_train/margins_2": 1.7147791385650635, "step": 584 }, { "epoch": 1.75, "logps_train/policy_1_2": -183.95846557617188, "logps_train/policy_1_l": -234.52528381347656, "logps_train/policy_1_w": -139.51722717285156, "logps_train/policy_2_2": -137.0585174560547, "logps_train/policy_2_w": -187.1199951171875, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.9729044437408447, "rewards_train/1-l": -3.884168863296509, "rewards_train/1-w": 3.526402711868286, "rewards_train/2-2": 3.444148063659668, "rewards_train/2-w": 1.4005012512207031, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.410571575164795, "rewards_train/margins_1": 1.5534982681274414, "rewards_train/margins_2": 2.043646812438965, "step": 584 }, { "epoch": 1.75, "logps_train/policy_1_2": -147.3093719482422, "logps_train/policy_1_l": -140.19021606445312, "logps_train/policy_1_w": -69.5050048828125, "logps_train/policy_2_2": -123.88060760498047, "logps_train/policy_2_w": -89.26348876953125, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -99.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 0.857344388961792, "rewards_train/1-l": -1.104178547859192, "rewards_train/1-w": 2.9541869163513184, "rewards_train/2-2": 2.115064859390259, "rewards_train/2-w": 2.019744873046875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.05836546421051, "rewards_train/margins_1": 2.0968425273895264, "rewards_train/margins_2": 0.09531998634338379, "step": 585 }, { "epoch": 1.75, "logps_train/policy_1_2": -132.7027587890625, "logps_train/policy_1_l": -269.0765686035156, "logps_train/policy_1_w": -124.82445526123047, "logps_train/policy_2_2": -109.86215209960938, "logps_train/policy_2_w": -166.1433868408203, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -229.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.9387086629867554, "rewards_train/1-l": -4.032069683074951, "rewards_train/1-w": 2.8142340183258057, "rewards_train/2-2": 2.842104911804199, "rewards_train/2-w": 1.7864415645599365, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.846303701400757, "rewards_train/margins_1": 0.8755253553390503, "rewards_train/margins_2": 1.0556633472442627, "step": 585 }, { "epoch": 1.75, "logps_train/policy_1_2": -81.02548217773438, "logps_train/policy_1_l": -90.24864959716797, "logps_train/policy_1_w": -62.445343017578125, "logps_train/policy_2_2": -56.807559967041016, "logps_train/policy_2_w": -92.3946762084961, "logps_train/ref_1_2": -85.0, "logps_train/ref_1_l": -74.5, "logps_train/ref_1_w": -82.5, "logps_train/ref_2_2": -74.5, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 0.43924832344055176, "rewards_train/1-l": -1.5623645782470703, "rewards_train/1-w": 1.9945282936096191, "rewards_train/2-2": 1.7616267204284668, "rewards_train/2-w": 0.5230321884155273, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5568928718566895, "rewards_train/margins_1": 1.5552799701690674, "rewards_train/margins_2": 1.2385945320129395, "step": 585 }, { "epoch": 1.75, "logps_train/policy_1_2": -223.32839965820312, "logps_train/policy_1_l": -134.94464111328125, "logps_train/policy_1_w": -127.74369049072266, "logps_train/policy_2_2": -188.2344970703125, "logps_train/policy_2_w": -150.05572509765625, "logps_train/ref_1_2": -235.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.1874727010726929, "rewards_train/1-l": -1.2010074853897095, "rewards_train/1-w": 2.5596158504486084, "rewards_train/2-2": 2.8984251022338867, "rewards_train/2-w": 1.8709893226623535, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.760623335838318, "rewards_train/margins_1": 1.3721431493759155, "rewards_train/margins_2": 1.0274357795715332, "step": 585 }, { "epoch": 1.75, "logps_train/policy_1_2": -115.86808776855469, "logps_train/policy_1_l": -113.06109619140625, "logps_train/policy_1_w": -69.28478240966797, "logps_train/policy_2_2": -81.13310241699219, "logps_train/policy_2_w": -94.1785659790039, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -95.5, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 1.106550693511963, "rewards_train/1-l": -1.614703893661499, "rewards_train/1-w": 2.61917781829834, "rewards_train/2-2": 2.802314281463623, "rewards_train/2-w": 1.7317531108856201, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.233881711959839, "rewards_train/margins_1": 1.512627124786377, "rewards_train/margins_2": 1.070561170578003, "step": 585 }, { "epoch": 1.75, "logps_train/policy_1_2": -155.92611694335938, "logps_train/policy_1_l": -168.15902709960938, "logps_train/policy_1_w": -141.9556884765625, "logps_train/policy_2_2": -119.85067749023438, "logps_train/policy_2_w": -190.50253295898438, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": 1.9167640209197998, "rewards_train/1-l": -2.357308864593506, "rewards_train/1-w": 3.483337879180908, "rewards_train/2-2": 2.936807155609131, "rewards_train/2-w": 1.6606837511062622, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.840646743774414, "rewards_train/margins_1": 1.5665738582611084, "rewards_train/margins_2": 1.2761234045028687, "step": 585 }, { "epoch": 1.75, "logps_train/policy_1_2": -59.119659423828125, "logps_train/policy_1_l": -115.2650375366211, "logps_train/policy_1_w": -98.75076293945312, "logps_train/policy_2_2": -35.69284439086914, "logps_train/policy_2_w": -130.1142120361328, "logps_train/ref_1_2": -67.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -54.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.8138154149055481, "rewards_train/1-l": -1.9186911582946777, "rewards_train/1-w": 2.2585177421569824, "rewards_train/2-2": 1.8490748405456543, "rewards_train/2-w": 0.407329261302948, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.17720890045166, "rewards_train/margins_1": 1.4447023272514343, "rewards_train/margins_2": 1.4417455792427063, "step": 585 }, { "epoch": 1.75, "logps_train/policy_1_2": -150.98497009277344, "logps_train/policy_1_l": -156.86019897460938, "logps_train/policy_1_w": -146.1512908935547, "logps_train/policy_2_2": -118.09353637695312, "logps_train/policy_2_w": -175.6608123779297, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.9671282768249512, "rewards_train/1-l": -1.6493021249771118, "rewards_train/1-w": 3.5723717212677, "rewards_train/2-2": 3.0812714099884033, "rewards_train/2-w": 2.083919048309326, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.221673846244812, "rewards_train/margins_1": 1.605243444442749, "rewards_train/margins_2": 0.9973523616790771, "step": 585 }, { "epoch": 1.75, "learning_rate": 2.1213595689245386e-07, "loss": 0.4179, "step": 586 }, { "epoch": 1.75, "logps_train/policy_1_2": -248.26173400878906, "logps_train/policy_1_l": -181.56797790527344, "logps_train/policy_1_w": -153.6826171875, "logps_train/policy_2_2": -196.5958709716797, "logps_train/policy_2_w": -190.98692321777344, "logps_train/ref_1_2": -266.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -238.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 1.7363251447677612, "rewards_train/1-l": -1.6419546604156494, "rewards_train/1-w": 3.8043951988220215, "rewards_train/2-2": 4.181037425994873, "rewards_train/2-w": 2.388807773590088, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.446349859237671, "rewards_train/margins_1": 2.0680700540542603, "rewards_train/margins_2": 1.7922296524047852, "step": 586 }, { "epoch": 1.75, "logps_train/policy_1_2": -199.35494995117188, "logps_train/policy_1_l": -207.17361450195312, "logps_train/policy_1_w": -139.44583129882812, "logps_train/policy_2_2": -154.8018341064453, "logps_train/policy_2_w": -195.027099609375, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 0.9857955574989319, "rewards_train/1-l": -3.2094521522521973, "rewards_train/1-w": 4.076902389526367, "rewards_train/2-2": 2.965702533721924, "rewards_train/2-w": 1.9767813682556152, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.2863545417785645, "rewards_train/margins_1": 3.0911068320274353, "rewards_train/margins_2": 0.9889211654663086, "step": 586 }, { "epoch": 1.75, "logps_train/policy_1_2": -170.93679809570312, "logps_train/policy_1_l": -112.04825592041016, "logps_train/policy_1_w": -77.81424713134766, "logps_train/policy_2_2": -126.55203247070312, "logps_train/policy_2_w": -96.10948181152344, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -99.5, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 0.6602264642715454, "rewards_train/1-l": -1.2543375492095947, "rewards_train/1-w": 2.020918846130371, "rewards_train/2-2": 2.911985397338867, "rewards_train/2-w": 1.2988173961639404, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.275256395339966, "rewards_train/margins_1": 1.3606923818588257, "rewards_train/margins_2": 1.6131680011749268, "step": 586 }, { "epoch": 1.75, "logps_train/policy_1_2": -143.38223266601562, "logps_train/policy_1_l": -155.02828979492188, "logps_train/policy_1_w": -132.71426391601562, "logps_train/policy_2_2": -114.7916259765625, "logps_train/policy_2_w": -184.33978271484375, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 1.6297454833984375, "rewards_train/1-l": -1.2969697713851929, "rewards_train/1-w": 3.4566988945007324, "rewards_train/2-2": 2.842712640762329, "rewards_train/2-w": 1.664459228515625, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.753668665885925, "rewards_train/margins_1": 1.826953411102295, "rewards_train/margins_2": 1.178253412246704, "step": 586 }, { "epoch": 1.75, "logps_train/policy_1_2": -133.41929626464844, "logps_train/policy_1_l": -185.2509765625, "logps_train/policy_1_w": -142.40170288085938, "logps_train/policy_2_2": -107.45724487304688, "logps_train/policy_2_w": -173.02639770507812, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 2.265883445739746, "rewards_train/1-l": -2.7651379108428955, "rewards_train/1-w": 2.8442044258117676, "rewards_train/2-2": 3.3011510372161865, "rewards_train/2-w": 1.5957973003387451, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.609342336654663, "rewards_train/margins_1": 0.5783209800720215, "rewards_train/margins_2": 1.7053537368774414, "step": 586 }, { "epoch": 1.75, "logps_train/policy_1_2": -141.27700805664062, "logps_train/policy_1_l": -188.77493286132812, "logps_train/policy_1_w": -64.71202087402344, "logps_train/policy_2_2": -102.270751953125, "logps_train/policy_2_w": -88.06940460205078, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 0.5230811834335327, "rewards_train/1-l": -1.7483930587768555, "rewards_train/1-w": 1.9452040195465088, "rewards_train/2-2": 2.1322996616363525, "rewards_train/2-w": 1.0114188194274902, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.6935970783233643, "rewards_train/margins_1": 1.422122836112976, "rewards_train/margins_2": 1.1208808422088623, "step": 586 }, { "epoch": 1.75, "logps_train/policy_1_2": -120.60149383544922, "logps_train/policy_1_l": -50.203216552734375, "logps_train/policy_1_w": -90.63352966308594, "logps_train/policy_2_2": -86.66459655761719, "logps_train/policy_2_w": -123.75106811523438, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -46.0, "logps_train/ref_1_w": -117.5, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.9761782884597778, "rewards_train/1-l": -0.4234955310821533, "rewards_train/1-w": 2.6884050369262695, "rewards_train/2-2": 2.470649480819702, "rewards_train/2-w": 1.2952064275741577, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.111900568008423, "rewards_train/margins_1": 1.7122267484664917, "rewards_train/margins_2": 1.1754430532455444, "step": 586 }, { "epoch": 1.75, "logps_train/policy_1_2": -162.6475372314453, "logps_train/policy_1_l": -178.48663330078125, "logps_train/policy_1_w": -87.62864685058594, "logps_train/policy_2_2": -111.31037902832031, "logps_train/policy_2_w": -123.08226013183594, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 1.360245943069458, "rewards_train/1-l": -2.933037757873535, "rewards_train/1-w": 2.605104446411133, "rewards_train/2-2": 2.929898738861084, "rewards_train/2-w": 1.2355239391326904, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.538142204284668, "rewards_train/margins_1": 1.2448585033416748, "rewards_train/margins_2": 1.6943747997283936, "step": 586 }, { "epoch": 1.76, "logps_train/policy_1_2": -76.38601684570312, "logps_train/policy_1_l": -109.74195861816406, "logps_train/policy_1_w": -45.4628791809082, "logps_train/policy_2_2": -41.87505340576172, "logps_train/policy_2_w": -91.98274993896484, "logps_train/ref_1_2": -83.5, "logps_train/ref_1_l": -92.0, "logps_train/ref_1_w": -66.0, "logps_train/ref_2_2": -65.0, "logps_train/ref_2_w": -100.0, "rewards_train/1-2": 0.7098362445831299, "rewards_train/1-l": -1.7620861530303955, "rewards_train/1-w": 2.029688596725464, "rewards_train/2-2": 2.319526195526123, "rewards_train/2-w": 0.8173501491546631, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7917747497558594, "rewards_train/margins_1": 1.319852352142334, "rewards_train/margins_2": 1.50217604637146, "step": 587 }, { "epoch": 1.76, "logps_train/policy_1_2": -216.7882537841797, "logps_train/policy_1_l": -161.9227294921875, "logps_train/policy_1_w": -78.3072509765625, "logps_train/policy_2_2": -181.84878540039062, "logps_train/policy_2_w": -103.61681365966797, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -220.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.8852373361587524, "rewards_train/1-l": -2.393444776535034, "rewards_train/1-w": 3.03958797454834, "rewards_train/2-2": 3.8776211738586426, "rewards_train/2-w": 1.9398804903030396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.433032751083374, "rewards_train/margins_1": 1.1543506383895874, "rewards_train/margins_2": 1.937740683555603, "step": 587 }, { "epoch": 1.76, "logps_train/policy_1_2": -75.72756958007812, "logps_train/policy_1_l": -67.29793548583984, "logps_train/policy_1_w": -51.465572357177734, "logps_train/policy_2_2": -53.005409240722656, "logps_train/policy_2_w": -69.281982421875, "logps_train/ref_1_2": -85.0, "logps_train/ref_1_l": -50.5, "logps_train/ref_1_w": -68.5, "logps_train/ref_2_2": -70.0, "logps_train/ref_2_w": -77.5, "rewards_train/1-2": 0.9342741966247559, "rewards_train/1-l": -1.6787195205688477, "rewards_train/1-w": 1.6968019008636475, "rewards_train/2-2": 1.719771385192871, "rewards_train/2-w": 0.8186768293380737, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.375521421432495, "rewards_train/margins_1": 0.7625277042388916, "rewards_train/margins_2": 0.9010945558547974, "step": 587 }, { "epoch": 1.76, "logps_train/policy_1_2": -97.66004943847656, "logps_train/policy_1_l": -159.97044372558594, "logps_train/policy_1_w": -49.1330680847168, "logps_train/policy_2_2": -74.31657409667969, "logps_train/policy_2_w": -65.13639068603516, "logps_train/ref_1_2": -110.0, "logps_train/ref_1_l": -125.5, "logps_train/ref_1_w": -75.0, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -83.5, "rewards_train/1-2": 1.202744960784912, "rewards_train/1-l": -3.468284845352173, "rewards_train/1-w": 2.6241931915283203, "rewards_train/2-2": 2.044123888015747, "rewards_train/2-w": 1.8371422290802002, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.092478036880493, "rewards_train/margins_1": 1.4214482307434082, "rewards_train/margins_2": 0.20698165893554688, "step": 587 }, { "epoch": 1.76, "logps_train/policy_1_2": -175.51327514648438, "logps_train/policy_1_l": -204.78465270996094, "logps_train/policy_1_w": -123.75564575195312, "logps_train/policy_2_2": -123.67220306396484, "logps_train/policy_2_w": -171.09625244140625, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 2.1486730575561523, "rewards_train/1-l": -2.684519052505493, "rewards_train/1-w": 3.849436044692993, "rewards_train/2-2": 3.9421544075012207, "rewards_train/2-w": 1.7434996366500854, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.533955097198486, "rewards_train/margins_1": 1.7007629871368408, "rewards_train/margins_2": 2.1986547708511353, "step": 587 }, { "epoch": 1.76, "logps_train/policy_1_2": -177.94674682617188, "logps_train/policy_1_l": -182.11306762695312, "logps_train/policy_1_w": -199.5718994140625, "logps_train/policy_2_2": -145.84156799316406, "logps_train/policy_2_w": -256.6827392578125, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -242.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -280.0, "rewards_train/1-2": 2.0709519386291504, "rewards_train/1-l": -0.940994381904602, "rewards_train/1-w": 4.27093505859375, "rewards_train/2-2": 3.2205305099487305, "rewards_train/2-w": 2.4223532676696777, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.211929440498352, "rewards_train/margins_1": 2.1999831199645996, "rewards_train/margins_2": 0.7981772422790527, "step": 587 }, { "epoch": 1.76, "logps_train/policy_1_2": -220.8430633544922, "logps_train/policy_1_l": -271.9556884765625, "logps_train/policy_1_w": -263.54449462890625, "logps_train/policy_2_2": -164.1692352294922, "logps_train/policy_2_w": -320.49090576171875, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -246.0, "logps_train/ref_1_w": -308.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -340.0, "rewards_train/1-2": 1.396944284439087, "rewards_train/1-l": -2.5799427032470703, "rewards_train/1-w": 4.467816352844238, "rewards_train/2-2": 4.0705766677856445, "rewards_train/2-w": 1.9274711608886719, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.047759056091309, "rewards_train/margins_1": 3.0708720684051514, "rewards_train/margins_2": 2.1431055068969727, "step": 587 }, { "epoch": 1.76, "logps_train/policy_1_2": -91.94009399414062, "logps_train/policy_1_l": -109.53160858154297, "logps_train/policy_1_w": -106.1251220703125, "logps_train/policy_2_2": -70.72370910644531, "logps_train/policy_2_w": -142.79757690429688, "logps_train/ref_1_2": -104.5, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 1.2419283390045166, "rewards_train/1-l": -2.0664420127868652, "rewards_train/1-w": 2.9902215003967285, "rewards_train/2-2": 2.024113178253174, "rewards_train/2-w": 1.4093055725097656, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.056663513183594, "rewards_train/margins_1": 1.748293161392212, "rewards_train/margins_2": 0.6148076057434082, "step": 587 }, { "epoch": 1.76, "learning_rate": 2.022913997847417e-07, "loss": 0.4499, "step": 588 }, { "epoch": 1.76, "logps_train/policy_1_2": -220.61053466796875, "logps_train/policy_1_l": -219.1721649169922, "logps_train/policy_1_w": -143.54937744140625, "logps_train/policy_2_2": -169.89108276367188, "logps_train/policy_2_w": -201.43453979492188, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 1.0923633575439453, "rewards_train/1-l": -2.967801809310913, "rewards_train/1-w": 3.415374994277954, "rewards_train/2-2": 2.9673361778259277, "rewards_train/2-w": 1.4002972841262817, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.383176803588867, "rewards_train/margins_1": 2.323011636734009, "rewards_train/margins_2": 1.567038893699646, "step": 588 }, { "epoch": 1.76, "logps_train/policy_1_2": -172.7235107421875, "logps_train/policy_1_l": -205.67245483398438, "logps_train/policy_1_w": -182.701171875, "logps_train/policy_2_2": -138.81387329101562, "logps_train/policy_2_w": -220.05140686035156, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 2.0157346725463867, "rewards_train/1-l": -2.054158926010132, "rewards_train/1-w": 3.395508050918579, "rewards_train/2-2": 3.1926355361938477, "rewards_train/2-w": 1.9729843139648438, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.449666976928711, "rewards_train/margins_1": 1.3797733783721924, "rewards_train/margins_2": 1.219651222229004, "step": 588 }, { "epoch": 1.76, "logps_train/policy_1_2": -215.241455078125, "logps_train/policy_1_l": -233.95486450195312, "logps_train/policy_1_w": -152.15737915039062, "logps_train/policy_2_2": -161.3770294189453, "logps_train/policy_2_w": -219.26724243164062, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -239.0, "rewards_train/1-2": 2.0373783111572266, "rewards_train/1-l": -2.3034939765930176, "rewards_train/1-w": 4.286995887756348, "rewards_train/2-2": 3.771672010421753, "rewards_train/2-w": 1.9303069114685059, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.590489864349365, "rewards_train/margins_1": 2.249617576599121, "rewards_train/margins_2": 1.841365098953247, "step": 588 }, { "epoch": 1.76, "logps_train/policy_1_2": -117.92481994628906, "logps_train/policy_1_l": -130.75314331054688, "logps_train/policy_1_w": -70.37358093261719, "logps_train/policy_2_2": -98.41409301757812, "logps_train/policy_2_w": -94.48805236816406, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -94.5, "logps_train/ref_2_2": -121.5, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 1.524705171585083, "rewards_train/1-l": -1.6441619396209717, "rewards_train/1-w": 2.4188919067382812, "rewards_train/2-2": 2.3132781982421875, "rewards_train/2-w": 1.4551012516021729, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.063053846359253, "rewards_train/margins_1": 0.8941867351531982, "rewards_train/margins_2": 0.8581769466400146, "step": 588 }, { "epoch": 1.76, "logps_train/policy_1_2": -86.79632568359375, "logps_train/policy_1_l": -112.47444152832031, "logps_train/policy_1_w": -67.87194061279297, "logps_train/policy_2_2": -66.7402572631836, "logps_train/policy_2_w": -87.32313537597656, "logps_train/ref_1_2": -98.5, "logps_train/ref_1_l": -95.5, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -101.0, "rewards_train/1-2": 1.1703680753707886, "rewards_train/1-l": -1.6868972778320312, "rewards_train/1-w": 2.050697088241577, "rewards_train/2-2": 1.9431616067886353, "rewards_train/2-w": 1.3415144681930542, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7375943660736084, "rewards_train/margins_1": 0.8803290128707886, "rewards_train/margins_2": 0.601647138595581, "step": 588 }, { "epoch": 1.76, "logps_train/policy_1_2": -121.42062377929688, "logps_train/policy_1_l": -151.44317626953125, "logps_train/policy_1_w": -155.67919921875, "logps_train/policy_2_2": -96.43621826171875, "logps_train/policy_2_w": -194.08482360839844, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": 1.828250765800476, "rewards_train/1-l": -2.7114477157592773, "rewards_train/1-w": 3.7281742095947266, "rewards_train/2-2": 2.6970033645629883, "rewards_train/2-w": 1.8993308544158936, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.439621925354004, "rewards_train/margins_1": 1.8999234437942505, "rewards_train/margins_2": 0.7976725101470947, "step": 588 }, { "epoch": 1.76, "logps_train/policy_1_2": -156.40194702148438, "logps_train/policy_1_l": -174.28866577148438, "logps_train/policy_1_w": -113.16930389404297, "logps_train/policy_2_2": -128.17843627929688, "logps_train/policy_2_w": -132.56800842285156, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.7660558223724365, "rewards_train/1-l": -2.23433518409729, "rewards_train/1-w": 3.561194658279419, "rewards_train/2-2": 2.6274683475494385, "rewards_train/2-w": 2.680699348449707, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.795529842376709, "rewards_train/margins_1": 1.7951388359069824, "rewards_train/margins_2": -0.053231000900268555, "step": 588 }, { "epoch": 1.76, "logps_train/policy_1_2": -115.22157287597656, "logps_train/policy_1_l": -141.84408569335938, "logps_train/policy_1_w": -111.349365234375, "logps_train/policy_2_2": -86.32151794433594, "logps_train/policy_2_w": -150.98020935058594, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.3965930938720703, "rewards_train/1-l": -1.5297207832336426, "rewards_train/1-w": 3.2677979469299316, "rewards_train/2-2": 2.463160514831543, "rewards_train/2-w": 1.7394795417785645, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.797518730163574, "rewards_train/margins_1": 1.8712048530578613, "rewards_train/margins_2": 0.7236809730529785, "step": 588 }, { "epoch": 1.76, "logps_train/policy_1_2": -219.28494262695312, "logps_train/policy_1_l": -259.14105224609375, "logps_train/policy_1_w": -148.73143005371094, "logps_train/policy_2_2": -174.4539794921875, "logps_train/policy_2_w": -199.78897094726562, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -229.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.8933802843093872, "rewards_train/1-l": -2.999748468399048, "rewards_train/1-w": 4.461231708526611, "rewards_train/2-2": 4.179601669311523, "rewards_train/2-w": 2.072666645050049, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.460980176925659, "rewards_train/margins_1": 2.567851424217224, "rewards_train/margins_2": 2.1069350242614746, "step": 589 }, { "epoch": 1.76, "logps_train/policy_1_2": -115.02120971679688, "logps_train/policy_1_l": -152.98521423339844, "logps_train/policy_1_w": -138.59927368164062, "logps_train/policy_2_2": -85.2332763671875, "logps_train/policy_2_w": -186.9384765625, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -110.5, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.6416288614273071, "rewards_train/1-l": -1.869223952293396, "rewards_train/1-w": 2.8713223934173584, "rewards_train/2-2": 2.523937702178955, "rewards_train/2-w": 0.9280266761779785, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.740546345710754, "rewards_train/margins_1": 1.2296935319900513, "rewards_train/margins_2": 1.5959110260009766, "step": 589 }, { "epoch": 1.76, "logps_train/policy_1_2": -134.60919189453125, "logps_train/policy_1_l": -160.11997985839844, "logps_train/policy_1_w": -104.55968475341797, "logps_train/policy_2_2": -89.65119934082031, "logps_train/policy_2_w": -151.84060668945312, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.575019359588623, "rewards_train/1-l": -2.433873414993286, "rewards_train/1-w": 3.069422721862793, "rewards_train/2-2": 3.0860514640808105, "rewards_train/2-w": 1.7698454856872559, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.503296136856079, "rewards_train/margins_1": 1.49440336227417, "rewards_train/margins_2": 1.3162059783935547, "step": 589 }, { "epoch": 1.76, "logps_train/policy_1_2": -132.28955078125, "logps_train/policy_1_l": -104.07637023925781, "logps_train/policy_1_w": -54.69579315185547, "logps_train/policy_2_2": -98.30439758300781, "logps_train/policy_2_w": -78.48208618164062, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -75.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -88.5, "rewards_train/1-2": 1.095264196395874, "rewards_train/1-l": -2.1130077838897705, "rewards_train/1-w": 1.985694169998169, "rewards_train/2-2": 2.4445605278015137, "rewards_train/2-w": 0.9799166321754456, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.0987019538879395, "rewards_train/margins_1": 0.8904299736022949, "rewards_train/margins_2": 1.4646438956260681, "step": 589 }, { "epoch": 1.76, "logps_train/policy_1_2": -180.6785888671875, "logps_train/policy_1_l": -80.77281188964844, "logps_train/policy_1_w": -60.58169937133789, "logps_train/policy_2_2": -124.62385559082031, "logps_train/policy_2_w": -93.5955810546875, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -67.0, "logps_train/ref_1_w": -83.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -107.5, "rewards_train/1-2": 1.4118295907974243, "rewards_train/1-l": -1.3682966232299805, "rewards_train/1-w": 2.245736598968506, "rewards_train/2-2": 3.3696460723876953, "rewards_train/2-w": 1.3927862644195557, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6140332221984863, "rewards_train/margins_1": 0.8339070081710815, "rewards_train/margins_2": 1.9768598079681396, "step": 589 }, { "epoch": 1.76, "logps_train/policy_1_2": -138.55526733398438, "logps_train/policy_1_l": -154.12017822265625, "logps_train/policy_1_w": -87.4572525024414, "logps_train/policy_2_2": -99.3520736694336, "logps_train/policy_2_w": -131.72430419921875, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.3491612672805786, "rewards_train/1-l": -1.912407636642456, "rewards_train/1-w": 2.839040756225586, "rewards_train/2-2": 2.645261287689209, "rewards_train/2-w": 0.8617496490478516, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.751448392868042, "rewards_train/margins_1": 1.4898794889450073, "rewards_train/margins_2": 1.7835116386413574, "step": 589 }, { "epoch": 1.76, "logps_train/policy_1_2": -205.1663818359375, "logps_train/policy_1_l": -246.93016052246094, "logps_train/policy_1_w": -173.345703125, "logps_train/policy_2_2": -162.42945861816406, "logps_train/policy_2_w": -220.3055419921875, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -201.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 2.3052358627319336, "rewards_train/1-l": -3.40493106842041, "rewards_train/1-w": 4.64511775970459, "rewards_train/2-2": 3.8414292335510254, "rewards_train/2-w": 2.6944470405578613, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 8.050048828125, "rewards_train/margins_1": 2.3398818969726562, "rewards_train/margins_2": 1.146982192993164, "step": 589 }, { "epoch": 1.76, "logps_train/policy_1_2": -138.729248046875, "logps_train/policy_1_l": -183.60650634765625, "logps_train/policy_1_w": -149.8804931640625, "logps_train/policy_2_2": -115.11196899414062, "logps_train/policy_2_w": -198.534912109375, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 2.286449670791626, "rewards_train/1-l": -1.60068941116333, "rewards_train/1-w": 3.5955443382263184, "rewards_train/2-2": 2.9938812255859375, "rewards_train/2-w": 1.581665277481079, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.196233749389648, "rewards_train/margins_1": 1.3090946674346924, "rewards_train/margins_2": 1.4122159481048584, "step": 589 }, { "epoch": 1.77, "learning_rate": 1.9267109498579962e-07, "loss": 0.3962, "step": 590 }, { "epoch": 1.77, "logps_train/policy_1_2": -108.32394409179688, "logps_train/policy_1_l": -131.21824645996094, "logps_train/policy_1_w": -128.13607788085938, "logps_train/policy_2_2": -89.71127319335938, "logps_train/policy_2_w": -159.4769744873047, "logps_train/ref_1_2": -127.5, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.926980972290039, "rewards_train/1-l": -2.2780745029449463, "rewards_train/1-w": 3.4301414489746094, "rewards_train/2-2": 2.53434157371521, "rewards_train/2-w": 1.7038644552230835, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.708215951919556, "rewards_train/margins_1": 1.5031604766845703, "rewards_train/margins_2": 0.8304771184921265, "step": 590 }, { "epoch": 1.77, "logps_train/policy_1_2": -153.80101013183594, "logps_train/policy_1_l": -126.61347961425781, "logps_train/policy_1_w": -73.94100952148438, "logps_train/policy_2_2": -115.01216125488281, "logps_train/policy_2_w": -97.5763931274414, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -114.5, "rewards_train/1-2": 1.7277119159698486, "rewards_train/1-l": -1.9927937984466553, "rewards_train/1-w": 2.766055107116699, "rewards_train/2-2": 3.240971326828003, "rewards_train/2-w": 1.6970479488372803, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.7588489055633545, "rewards_train/margins_1": 1.0383431911468506, "rewards_train/margins_2": 1.5439233779907227, "step": 590 }, { "epoch": 1.77, "logps_train/policy_1_2": -158.49267578125, "logps_train/policy_1_l": -214.59146118164062, "logps_train/policy_1_w": -137.35202026367188, "logps_train/policy_2_2": -117.957763671875, "logps_train/policy_2_w": -183.00216674804688, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.621044635772705, "rewards_train/1-l": -3.445863723754883, "rewards_train/1-w": 3.8272976875305176, "rewards_train/2-2": 3.3245363235473633, "rewards_train/2-w": 2.1185336112976074, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.2731614112854, "rewards_train/margins_1": 2.2062530517578125, "rewards_train/margins_2": 1.2060027122497559, "step": 590 }, { "epoch": 1.77, "logps_train/policy_1_2": -163.1395721435547, "logps_train/policy_1_l": -188.72154235839844, "logps_train/policy_1_w": -118.48001861572266, "logps_train/policy_2_2": -134.84219360351562, "logps_train/policy_2_w": -144.14044189453125, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.6448322534561157, "rewards_train/1-l": -2.6834819316864014, "rewards_train/1-w": 2.954733371734619, "rewards_train/2-2": 2.889216899871826, "rewards_train/2-w": 1.913299798965454, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.6382153034210205, "rewards_train/margins_1": 1.3099011182785034, "rewards_train/margins_2": 0.9759171009063721, "step": 590 }, { "epoch": 1.77, "logps_train/policy_1_2": -112.08763885498047, "logps_train/policy_1_l": -96.96827697753906, "logps_train/policy_1_w": -72.28671264648438, "logps_train/policy_2_2": -81.72319030761719, "logps_train/policy_2_w": -102.01577758789062, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -76.5, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 0.6564710140228271, "rewards_train/1-l": -2.0466315746307373, "rewards_train/1-w": 2.775235652923584, "rewards_train/2-2": 2.0811963081359863, "rewards_train/2-w": 1.7749847173690796, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.821867227554321, "rewards_train/margins_1": 2.118764638900757, "rewards_train/margins_2": 0.30621159076690674, "step": 590 }, { "epoch": 1.77, "logps_train/policy_1_2": -247.75241088867188, "logps_train/policy_1_l": -206.87667846679688, "logps_train/policy_1_w": -186.76878356933594, "logps_train/policy_2_2": -197.51345825195312, "logps_train/policy_2_w": -230.9581298828125, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -230.0, "logps_train/ref_2_2": -247.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 2.2138216495513916, "rewards_train/1-l": -2.9450910091400146, "rewards_train/1-w": 4.359059810638428, "rewards_train/2-2": 4.911933898925781, "rewards_train/2-w": 2.561218023300171, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.304150819778442, "rewards_train/margins_1": 2.145238161087036, "rewards_train/margins_2": 2.3507158756256104, "step": 590 }, { "epoch": 1.77, "logps_train/policy_1_2": -164.03565979003906, "logps_train/policy_1_l": -195.14108276367188, "logps_train/policy_1_w": -177.1448974609375, "logps_train/policy_2_2": -124.440185546875, "logps_train/policy_2_w": -231.2654571533203, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -250.0, "rewards_train/1-2": 1.8753407001495361, "rewards_train/1-l": -1.5377426147460938, "rewards_train/1-w": 3.50894832611084, "rewards_train/2-2": 3.0981688499450684, "rewards_train/2-w": 1.8757991790771484, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.046690940856934, "rewards_train/margins_1": 1.6336076259613037, "rewards_train/margins_2": 1.22236967086792, "step": 590 }, { "epoch": 1.77, "logps_train/policy_1_2": -117.62875366210938, "logps_train/policy_1_l": -165.0584716796875, "logps_train/policy_1_w": -99.30108642578125, "logps_train/policy_2_2": -96.16394805908203, "logps_train/policy_2_w": -130.36392211914062, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.5089998245239258, "rewards_train/1-l": -2.768348217010498, "rewards_train/1-w": 2.9652035236358643, "rewards_train/2-2": 2.5765738487243652, "rewards_train/2-w": 1.8261083364486694, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.733551740646362, "rewards_train/margins_1": 1.4562036991119385, "rewards_train/margins_2": 0.7504655122756958, "step": 590 }, { "epoch": 1.77, "logps_train/policy_1_2": -186.65267944335938, "logps_train/policy_1_l": -253.48403930664062, "logps_train/policy_1_w": -152.5191650390625, "logps_train/policy_2_2": -143.96817016601562, "logps_train/policy_2_w": -200.90866088867188, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -232.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.3153965473175049, "rewards_train/1-l": -2.207778215408325, "rewards_train/1-w": 3.8918323516845703, "rewards_train/2-2": 2.748887538909912, "rewards_train/2-w": 2.0841336250305176, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.0996105670928955, "rewards_train/margins_1": 2.5764358043670654, "rewards_train/margins_2": 0.6647539138793945, "step": 591 }, { "epoch": 1.77, "logps_train/policy_1_2": -110.08638763427734, "logps_train/policy_1_l": -101.45410919189453, "logps_train/policy_1_w": -79.19068908691406, "logps_train/policy_2_2": -91.5036392211914, "logps_train/policy_2_w": -105.84646606445312, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -108.5, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.9944863319396973, "rewards_train/1-l": -1.0245124101638794, "rewards_train/1-w": 2.9227283000946045, "rewards_train/2-2": 2.760573625564575, "rewards_train/2-w": 1.7411346435546875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.947240710258484, "rewards_train/margins_1": 0.9282419681549072, "rewards_train/margins_2": 1.0194389820098877, "step": 591 }, { "epoch": 1.77, "logps_train/policy_1_2": -153.24868774414062, "logps_train/policy_1_l": -198.4421844482422, "logps_train/policy_1_w": -119.80843353271484, "logps_train/policy_2_2": -120.3758316040039, "logps_train/policy_2_w": -159.23629760742188, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 1.4884129762649536, "rewards_train/1-l": -2.7789840698242188, "rewards_train/1-w": 3.551969051361084, "rewards_train/2-2": 3.0506975650787354, "rewards_train/2-w": 1.9810576438903809, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.330953121185303, "rewards_train/margins_1": 2.0635560750961304, "rewards_train/margins_2": 1.0696399211883545, "step": 591 }, { "epoch": 1.77, "logps_train/policy_1_2": -154.37957763671875, "logps_train/policy_1_l": -156.08798217773438, "logps_train/policy_1_w": -132.89035034179688, "logps_train/policy_2_2": -121.94912719726562, "logps_train/policy_2_w": -152.09840393066406, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": 1.374541997909546, "rewards_train/1-l": -2.015439033508301, "rewards_train/1-w": 2.4484636783599854, "rewards_train/2-2": 2.794931173324585, "rewards_train/2-w": 1.487034559249878, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.463902711868286, "rewards_train/margins_1": 1.0739216804504395, "rewards_train/margins_2": 1.307896614074707, "step": 591 }, { "epoch": 1.77, "logps_train/policy_1_2": -132.33096313476562, "logps_train/policy_1_l": -186.5146484375, "logps_train/policy_1_w": -68.99449157714844, "logps_train/policy_2_2": -110.31767272949219, "logps_train/policy_2_w": -86.19943237304688, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -108.5, "rewards_train/1-2": 1.559481143951416, "rewards_train/1-l": -2.355858564376831, "rewards_train/1-w": 2.698206901550293, "rewards_train/2-2": 2.5764362812042236, "rewards_train/2-w": 2.2331817150115967, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.054065465927124, "rewards_train/margins_1": 1.138725757598877, "rewards_train/margins_2": 0.34325456619262695, "step": 591 }, { "epoch": 1.77, "logps_train/policy_1_2": -137.0701904296875, "logps_train/policy_1_l": -160.1976776123047, "logps_train/policy_1_w": -109.39569091796875, "logps_train/policy_2_2": -98.7689208984375, "logps_train/policy_2_w": -154.45407104492188, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 0.9875121116638184, "rewards_train/1-l": -2.1295323371887207, "rewards_train/1-w": 2.972930908203125, "rewards_train/2-2": 2.6750612258911133, "rewards_train/2-w": 1.1202173233032227, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.102463245391846, "rewards_train/margins_1": 1.9854187965393066, "rewards_train/margins_2": 1.5548439025878906, "step": 591 }, { "epoch": 1.77, "logps_train/policy_1_2": -210.71475219726562, "logps_train/policy_1_l": -195.35760498046875, "logps_train/policy_1_w": -128.4173583984375, "logps_train/policy_2_2": -161.64349365234375, "logps_train/policy_2_w": -180.3459014892578, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.2418056726455688, "rewards_train/1-l": -2.290447235107422, "rewards_train/1-w": 4.027013778686523, "rewards_train/2-2": 3.2940480709075928, "rewards_train/2-w": 2.259160041809082, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.317461013793945, "rewards_train/margins_1": 2.7852081060409546, "rewards_train/margins_2": 1.0348880290985107, "step": 591 }, { "epoch": 1.77, "logps_train/policy_1_2": -118.68981170654297, "logps_train/policy_1_l": -186.91213989257812, "logps_train/policy_1_w": -149.08758544921875, "logps_train/policy_2_2": -78.35589599609375, "logps_train/policy_2_w": -206.574951171875, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -104.5, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 0.8614870309829712, "rewards_train/1-l": -2.8895039558410645, "rewards_train/1-w": 2.6818671226501465, "rewards_train/2-2": 2.6177310943603516, "rewards_train/2-w": 0.4061770737171173, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.571371078491211, "rewards_train/margins_1": 1.8203800916671753, "rewards_train/margins_2": 2.2115540206432343, "step": 591 }, { "epoch": 1.77, "learning_rate": 1.832759814204166e-07, "loss": 0.3546, "step": 592 }, { "epoch": 1.77, "logps_train/policy_1_2": -213.6827392578125, "logps_train/policy_1_l": -188.7318115234375, "logps_train/policy_1_w": -143.79843139648438, "logps_train/policy_2_2": -158.9698486328125, "logps_train/policy_2_w": -193.4640350341797, "logps_train/ref_1_2": -231.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.6817255020141602, "rewards_train/1-l": -1.766931414604187, "rewards_train/1-w": 3.788905143737793, "rewards_train/2-2": 3.565514087677002, "rewards_train/2-w": 1.9223467111587524, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.55583655834198, "rewards_train/margins_1": 2.107179641723633, "rewards_train/margins_2": 1.6431673765182495, "step": 592 }, { "epoch": 1.77, "logps_train/policy_1_2": -181.814697265625, "logps_train/policy_1_l": -147.37881469726562, "logps_train/policy_1_w": -105.73860168457031, "logps_train/policy_2_2": -128.03140258789062, "logps_train/policy_2_w": -149.927734375, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -119.5, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.0357179641723633, "rewards_train/1-l": -2.7894446849823, "rewards_train/1-w": 3.250359535217285, "rewards_train/2-2": 3.4085781574249268, "rewards_train/2-w": 1.2361342906951904, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.039804220199585, "rewards_train/margins_1": 2.214641571044922, "rewards_train/margins_2": 2.1724438667297363, "step": 592 }, { "epoch": 1.77, "logps_train/policy_1_2": -112.73524475097656, "logps_train/policy_1_l": -222.34814453125, "logps_train/policy_1_w": -136.11257934570312, "logps_train/policy_2_2": -90.51702880859375, "logps_train/policy_2_w": -199.52259826660156, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.8921005725860596, "rewards_train/1-l": -3.0105955600738525, "rewards_train/1-w": 3.7246789932250977, "rewards_train/2-2": 2.7357966899871826, "rewards_train/2-w": 0.9524276852607727, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.73527455329895, "rewards_train/margins_1": 1.832578420639038, "rewards_train/margins_2": 1.78336900472641, "step": 592 }, { "epoch": 1.77, "logps_train/policy_1_2": -192.22665405273438, "logps_train/policy_1_l": -105.83264923095703, "logps_train/policy_1_w": -132.62937927246094, "logps_train/policy_2_2": -133.10336303710938, "logps_train/policy_2_w": -167.6026611328125, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 0.9117093682289124, "rewards_train/1-l": -1.7223272323608398, "rewards_train/1-w": 3.442530632019043, "rewards_train/2-2": 3.6009926795959473, "rewards_train/2-w": 1.7491087913513184, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.164857864379883, "rewards_train/margins_1": 2.5308212637901306, "rewards_train/margins_2": 1.851883888244629, "step": 592 }, { "epoch": 1.77, "logps_train/policy_1_2": -205.9803009033203, "logps_train/policy_1_l": -148.91015625, "logps_train/policy_1_w": -150.93423461914062, "logps_train/policy_2_2": -150.9139404296875, "logps_train/policy_2_w": -209.13552856445312, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 0.8230630159378052, "rewards_train/1-l": -1.6413077116012573, "rewards_train/1-w": 3.733139991760254, "rewards_train/2-2": 3.529700517654419, "rewards_train/2-w": 1.000510334968567, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.374447703361511, "rewards_train/margins_1": 2.9100769758224487, "rewards_train/margins_2": 2.529190182685852, "step": 592 }, { "epoch": 1.77, "logps_train/policy_1_2": -165.23159790039062, "logps_train/policy_1_l": -101.2832260131836, "logps_train/policy_1_w": -48.43943786621094, "logps_train/policy_2_2": -122.35933685302734, "logps_train/policy_2_w": -87.59375, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -85.5, "logps_train/ref_1_w": -73.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": -0.13097038865089417, "rewards_train/1-l": -1.595900535583496, "rewards_train/1-w": 2.4408223628997803, "rewards_train/2-2": 2.141019344329834, "rewards_train/2-w": 0.7195314168930054, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.036722898483276, "rewards_train/margins_1": 2.5717927515506744, "rewards_train/margins_2": 1.4214879274368286, "step": 592 }, { "epoch": 1.77, "logps_train/policy_1_2": -94.2452621459961, "logps_train/policy_1_l": -138.60813903808594, "logps_train/policy_1_w": -80.89913177490234, "logps_train/policy_2_2": -69.69746398925781, "logps_train/policy_2_w": -117.39936065673828, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -111.5, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.6594582796096802, "rewards_train/1-l": -2.2822980880737305, "rewards_train/1-w": 3.075711727142334, "rewards_train/2-2": 2.5392374992370605, "rewards_train/2-w": 1.4006894826889038, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.3580098152160645, "rewards_train/margins_1": 1.4162534475326538, "rewards_train/margins_2": 1.1385480165481567, "step": 592 }, { "epoch": 1.77, "logps_train/policy_1_2": -84.1410903930664, "logps_train/policy_1_l": -135.71002197265625, "logps_train/policy_1_w": -85.01478576660156, "logps_train/policy_2_2": -67.93453979492188, "logps_train/policy_2_w": -108.18951416015625, "logps_train/ref_1_2": -97.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 1.2546405792236328, "rewards_train/1-l": -2.163970947265625, "rewards_train/1-w": 2.2633652687072754, "rewards_train/2-2": 1.7432646751403809, "rewards_train/2-w": 1.3603460788726807, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.4273362159729, "rewards_train/margins_1": 1.0087246894836426, "rewards_train/margins_2": 0.3829185962677002, "step": 592 }, { "epoch": 1.78, "logps_train/policy_1_2": -99.23760986328125, "logps_train/policy_1_l": -113.69783020019531, "logps_train/policy_1_w": -94.2023696899414, "logps_train/policy_2_2": -82.95355224609375, "logps_train/policy_2_w": -119.85995483398438, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -96.5, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -105.5, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.5766303539276123, "rewards_train/1-l": -1.7199782133102417, "rewards_train/1-w": 3.3344507217407227, "rewards_train/2-2": 2.2487852573394775, "rewards_train/2-w": 2.076504707336426, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.054428935050964, "rewards_train/margins_1": 1.7578203678131104, "rewards_train/margins_2": 0.17228055000305176, "step": 593 }, { "epoch": 1.78, "logps_train/policy_1_2": -204.07479858398438, "logps_train/policy_1_l": -169.4805908203125, "logps_train/policy_1_w": -116.73119354248047, "logps_train/policy_2_2": -158.9907684326172, "logps_train/policy_2_w": -140.38584899902344, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 1.6112685203552246, "rewards_train/1-l": -2.1769652366638184, "rewards_train/1-w": 3.0850839614868164, "rewards_train/2-2": 3.9446725845336914, "rewards_train/2-w": 2.0200085639953613, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.262049198150635, "rewards_train/margins_1": 1.4738154411315918, "rewards_train/margins_2": 1.92466402053833, "step": 593 }, { "epoch": 1.78, "logps_train/policy_1_2": -222.98110961914062, "logps_train/policy_1_l": -246.93218994140625, "logps_train/policy_1_w": -166.69650268554688, "logps_train/policy_2_2": -182.61624145507812, "logps_train/policy_2_w": -221.66012573242188, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -241.0, "rewards_train/1-2": 1.772982120513916, "rewards_train/1-l": -3.4760310649871826, "rewards_train/1-w": 3.8991007804870605, "rewards_train/2-2": 3.503220558166504, "rewards_train/2-w": 1.9277379512786865, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.375131845474243, "rewards_train/margins_1": 2.1261186599731445, "rewards_train/margins_2": 1.5754826068878174, "step": 593 }, { "epoch": 1.78, "logps_train/policy_1_2": -229.70437622070312, "logps_train/policy_1_l": -212.26315307617188, "logps_train/policy_1_w": -152.16078186035156, "logps_train/policy_2_2": -183.5501708984375, "logps_train/policy_2_w": -189.55606079101562, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": 2.773312568664551, "rewards_train/1-l": -2.776315689086914, "rewards_train/1-w": 4.3776726722717285, "rewards_train/2-2": 4.854357719421387, "rewards_train/2-w": 2.9256439208984375, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.153988361358643, "rewards_train/margins_1": 1.6043601036071777, "rewards_train/margins_2": 1.9287137985229492, "step": 593 }, { "epoch": 1.78, "logps_train/policy_1_2": -253.17388916015625, "logps_train/policy_1_l": -223.99888610839844, "logps_train/policy_1_w": -148.45054626464844, "logps_train/policy_2_2": -200.69583129882812, "logps_train/policy_2_w": -198.69874572753906, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -239.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.7076120376586914, "rewards_train/1-l": -2.1842637062072754, "rewards_train/1-w": 3.901820421218872, "rewards_train/2-2": 3.8554158210754395, "rewards_train/2-w": 1.8863755464553833, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.0860841274261475, "rewards_train/margins_1": 2.1942083835601807, "rewards_train/margins_2": 1.9690402746200562, "step": 593 }, { "epoch": 1.78, "logps_train/policy_1_2": -120.03900909423828, "logps_train/policy_1_l": -97.46749877929688, "logps_train/policy_1_w": -86.46336364746094, "logps_train/policy_2_2": -95.83622741699219, "logps_train/policy_2_w": -115.31242370605469, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -80.0, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 1.8937561511993408, "rewards_train/1-l": -1.7316131591796875, "rewards_train/1-w": 2.416945457458496, "rewards_train/2-2": 2.9851274490356445, "rewards_train/2-w": 1.1461007595062256, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.148558616638184, "rewards_train/margins_1": 0.5231893062591553, "rewards_train/margins_2": 1.839026689529419, "step": 593 }, { "epoch": 1.78, "logps_train/policy_1_2": -140.29702758789062, "logps_train/policy_1_l": -142.94810485839844, "logps_train/policy_1_w": -129.94894409179688, "logps_train/policy_2_2": -111.16630554199219, "logps_train/policy_2_w": -166.92218017578125, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -119.5, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 1.1952967643737793, "rewards_train/1-l": -2.3494975566864014, "rewards_train/1-w": 2.7761993408203125, "rewards_train/2-2": 2.7021193504333496, "rewards_train/2-w": 0.8429379463195801, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.125696897506714, "rewards_train/margins_1": 1.5809025764465332, "rewards_train/margins_2": 1.8591814041137695, "step": 593 }, { "epoch": 1.78, "logps_train/policy_1_2": -215.75082397460938, "logps_train/policy_1_l": -180.0021209716797, "logps_train/policy_1_w": -124.86575317382812, "logps_train/policy_2_2": -150.88882446289062, "logps_train/policy_2_w": -176.75242614746094, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.5999176502227783, "rewards_train/1-l": -2.062713146209717, "rewards_train/1-w": 3.483736991882324, "rewards_train/2-2": 4.164240837097168, "rewards_train/2-w": 1.570069670677185, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.546450138092041, "rewards_train/margins_1": 1.883819341659546, "rewards_train/margins_2": 2.594171166419983, "step": 593 }, { "epoch": 1.78, "learning_rate": 1.7410697603511383e-07, "loss": 0.3078, "step": 594 }, { "epoch": 1.78, "logps_train/policy_1_2": -250.9656982421875, "logps_train/policy_1_l": -236.02728271484375, "logps_train/policy_1_w": -132.93667602539062, "logps_train/policy_2_2": -196.51914978027344, "logps_train/policy_2_w": -179.07525634765625, "logps_train/ref_1_2": -266.0, "logps_train/ref_1_l": -216.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -236.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.465930700302124, "rewards_train/1-l": -2.1027276515960693, "rewards_train/1-w": 3.465707302093506, "rewards_train/2-2": 3.9605846405029297, "rewards_train/2-w": 1.8987239599227905, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.568434953689575, "rewards_train/margins_1": 1.9997766017913818, "rewards_train/margins_2": 2.061860680580139, "step": 594 }, { "epoch": 1.78, "logps_train/policy_1_2": -144.97702026367188, "logps_train/policy_1_l": -200.135009765625, "logps_train/policy_1_w": -105.61270141601562, "logps_train/policy_2_2": -112.67410278320312, "logps_train/policy_2_w": -147.46517944335938, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.6507349014282227, "rewards_train/1-l": -2.0678958892822266, "rewards_train/1-w": 3.3753514289855957, "rewards_train/2-2": 2.802023410797119, "rewards_train/2-w": 1.9952800273895264, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.443247318267822, "rewards_train/margins_1": 1.724616527557373, "rewards_train/margins_2": 0.8067433834075928, "step": 594 }, { "epoch": 1.78, "logps_train/policy_1_2": -187.6836395263672, "logps_train/policy_1_l": -252.9624481201172, "logps_train/policy_1_w": -144.8328857421875, "logps_train/policy_2_2": -144.12652587890625, "logps_train/policy_2_w": -186.35842895507812, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 2.186323404312134, "rewards_train/1-l": -3.154057264328003, "rewards_train/1-w": 4.06475830078125, "rewards_train/2-2": 3.8881278038024902, "rewards_train/2-w": 2.1637654304504395, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.218815565109253, "rewards_train/margins_1": 1.8784348964691162, "rewards_train/margins_2": 1.7243623733520508, "step": 594 }, { "epoch": 1.78, "logps_train/policy_1_2": -122.9278564453125, "logps_train/policy_1_l": -152.45687866210938, "logps_train/policy_1_w": -139.36553955078125, "logps_train/policy_2_2": -85.01422119140625, "logps_train/policy_2_w": -189.7464599609375, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": 1.3822143077850342, "rewards_train/1-l": -1.3914873600006104, "rewards_train/1-w": 3.0431337356567383, "rewards_train/2-2": 2.9110770225524902, "rewards_train/2-w": 0.8659794330596924, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.434621095657349, "rewards_train/margins_1": 1.660919427871704, "rewards_train/margins_2": 2.045097589492798, "step": 594 }, { "epoch": 1.78, "logps_train/policy_1_2": -175.21861267089844, "logps_train/policy_1_l": -180.87863159179688, "logps_train/policy_1_w": -129.15255737304688, "logps_train/policy_2_2": -139.1513214111328, "logps_train/policy_2_w": -190.20509338378906, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.5377094745635986, "rewards_train/1-l": -1.2292680740356445, "rewards_train/1-w": 3.52497935295105, "rewards_train/2-2": 2.937211036682129, "rewards_train/2-w": 1.5316390991210938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.754247426986694, "rewards_train/margins_1": 1.9872698783874512, "rewards_train/margins_2": 1.4055719375610352, "step": 594 }, { "epoch": 1.78, "logps_train/policy_1_2": -150.89846801757812, "logps_train/policy_1_l": -216.71963500976562, "logps_train/policy_1_w": -132.0643768310547, "logps_train/policy_2_2": -122.50862121582031, "logps_train/policy_2_w": -161.7279510498047, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.9945286512374878, "rewards_train/1-l": -3.405947685241699, "rewards_train/1-w": 3.4544997215270996, "rewards_train/2-2": 2.9999194145202637, "rewards_train/2-w": 1.9916585683822632, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.860447406768799, "rewards_train/margins_1": 1.4599710702896118, "rewards_train/margins_2": 1.0082608461380005, "step": 594 }, { "epoch": 1.78, "logps_train/policy_1_2": -122.515869140625, "logps_train/policy_1_l": -72.69757080078125, "logps_train/policy_1_w": -60.91097640991211, "logps_train/policy_2_2": -102.98988342285156, "logps_train/policy_2_w": -78.77786254882812, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -54.25, "logps_train/ref_1_w": -82.5, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -94.5, "rewards_train/1-2": 1.406225562095642, "rewards_train/1-l": -1.8379206657409668, "rewards_train/1-w": 2.1694495677948, "rewards_train/2-2": 2.688511848449707, "rewards_train/2-w": 1.5409644842147827, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.007370233535767, "rewards_train/margins_1": 0.7632240056991577, "rewards_train/margins_2": 1.1475473642349243, "step": 594 }, { "epoch": 1.78, "logps_train/policy_1_2": -164.5299835205078, "logps_train/policy_1_l": -164.37298583984375, "logps_train/policy_1_w": -97.27101135253906, "logps_train/policy_2_2": -124.65768432617188, "logps_train/policy_2_w": -126.45208740234375, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 0.8907521367073059, "rewards_train/1-l": -1.411517858505249, "rewards_train/1-w": 2.7791483402252197, "rewards_train/2-2": 2.5092310905456543, "rewards_train/2-w": 1.406354546546936, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.190666198730469, "rewards_train/margins_1": 1.8883962035179138, "rewards_train/margins_2": 1.1028765439987183, "step": 594 }, { "epoch": 1.78, "logps_train/policy_1_2": -113.81694793701172, "logps_train/policy_1_l": -133.4249267578125, "logps_train/policy_1_w": -72.93944549560547, "logps_train/policy_2_2": -76.6144027709961, "logps_train/policy_2_w": -109.91459655761719, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -100.5, "logps_train/ref_2_w": -122.5, "rewards_train/1-2": 1.0518991947174072, "rewards_train/1-l": -2.0663208961486816, "rewards_train/1-w": 2.278320789337158, "rewards_train/2-2": 2.4026217460632324, "rewards_train/2-w": 1.2374470233917236, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.34464168548584, "rewards_train/margins_1": 1.226421594619751, "rewards_train/margins_2": 1.1651747226715088, "step": 595 }, { "epoch": 1.78, "logps_train/policy_1_2": -119.57722473144531, "logps_train/policy_1_l": -132.4071044921875, "logps_train/policy_1_w": -117.51036071777344, "logps_train/policy_2_2": -93.6419906616211, "logps_train/policy_2_w": -148.47857666015625, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.9016525745391846, "rewards_train/1-l": -2.026648998260498, "rewards_train/1-w": 3.3067760467529297, "rewards_train/2-2": 3.0826759338378906, "rewards_train/2-w": 1.9841731786727905, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.333425045013428, "rewards_train/margins_1": 1.4051234722137451, "rewards_train/margins_2": 1.0985027551651, "step": 595 }, { "epoch": 1.78, "logps_train/policy_1_2": -142.19778442382812, "logps_train/policy_1_l": -135.88436889648438, "logps_train/policy_1_w": -105.50057220458984, "logps_train/policy_2_2": -114.23877716064453, "logps_train/policy_2_w": -133.9908905029297, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.2520960569381714, "rewards_train/1-l": -1.760312795639038, "rewards_train/1-w": 3.1843180656433105, "rewards_train/2-2": 2.318699598312378, "rewards_train/2-w": 1.8610676527023315, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.944630861282349, "rewards_train/margins_1": 1.9322220087051392, "rewards_train/margins_2": 0.4576319456100464, "step": 595 }, { "epoch": 1.78, "logps_train/policy_1_2": -86.28953552246094, "logps_train/policy_1_l": -55.040977478027344, "logps_train/policy_1_w": -52.6479606628418, "logps_train/policy_2_2": -57.9570198059082, "logps_train/policy_2_w": -73.72064971923828, "logps_train/ref_1_2": -96.0, "logps_train/ref_1_l": -45.5, "logps_train/ref_1_w": -74.0, "logps_train/ref_2_2": -78.0, "logps_train/ref_2_w": -85.0, "rewards_train/1-2": 0.9665544033050537, "rewards_train/1-l": -0.9394491314888, "rewards_train/1-w": 2.1221179962158203, "rewards_train/2-2": 1.9886730909347534, "rewards_train/2-w": 1.1209040880203247, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0615671277046204, "rewards_train/margins_1": 1.1555635929107666, "rewards_train/margins_2": 0.8677690029144287, "step": 595 }, { "epoch": 1.78, "logps_train/policy_1_2": -118.13935089111328, "logps_train/policy_1_l": -152.91714477539062, "logps_train/policy_1_w": -117.70055389404297, "logps_train/policy_2_2": -89.05111694335938, "logps_train/policy_2_w": -159.97845458984375, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -113.5, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.4727840423583984, "rewards_train/1-l": -2.3983559608459473, "rewards_train/1-w": 3.3721323013305664, "rewards_train/2-2": 2.460513114929199, "rewards_train/2-w": 1.3834037780761719, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.770488262176514, "rewards_train/margins_1": 1.899348258972168, "rewards_train/margins_2": 1.0771093368530273, "step": 595 }, { "epoch": 1.78, "logps_train/policy_1_2": -257.79669189453125, "logps_train/policy_1_l": -193.79112243652344, "logps_train/policy_1_w": -125.87136840820312, "logps_train/policy_2_2": -191.1741943359375, "logps_train/policy_2_w": -173.10743713378906, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.1535351276397705, "rewards_train/1-l": -3.1752054691314697, "rewards_train/1-w": 3.3351287841796875, "rewards_train/2-2": 4.003087997436523, "rewards_train/2-w": 1.5292949676513672, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.510334253311157, "rewards_train/margins_1": 2.181593656539917, "rewards_train/margins_2": 2.4737930297851562, "step": 595 }, { "epoch": 1.78, "logps_train/policy_1_2": -75.24640655517578, "logps_train/policy_1_l": -104.13372802734375, "logps_train/policy_1_w": -65.8221664428711, "logps_train/policy_2_2": -55.145843505859375, "logps_train/policy_2_w": -90.21292114257812, "logps_train/ref_1_2": -91.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -77.5, "logps_train/ref_2_w": -107.0, "rewards_train/1-2": 1.6222342252731323, "rewards_train/1-l": -2.093963623046875, "rewards_train/1-w": 2.738877058029175, "rewards_train/2-2": 2.232290744781494, "rewards_train/2-w": 1.6646454334259033, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.83284068107605, "rewards_train/margins_1": 1.1166428327560425, "rewards_train/margins_2": 0.5676453113555908, "step": 595 }, { "epoch": 1.78, "logps_train/policy_1_2": -233.10704040527344, "logps_train/policy_1_l": -321.7334289550781, "logps_train/policy_1_w": -173.38119506835938, "logps_train/policy_2_2": -181.27728271484375, "logps_train/policy_2_w": -223.21368408203125, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -274.0, "logps_train/ref_1_w": -209.0, "logps_train/ref_2_2": -215.0, "logps_train/ref_2_w": -242.0, "rewards_train/1-2": 1.2189836502075195, "rewards_train/1-l": -4.714356899261475, "rewards_train/1-w": 3.594693899154663, "rewards_train/2-2": 3.3285224437713623, "rewards_train/2-w": 1.8255064487457275, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 8.309050798416138, "rewards_train/margins_1": 2.3757102489471436, "rewards_train/margins_2": 1.5030159950256348, "step": 595 }, { "epoch": 1.78, "learning_rate": 1.651649737086533e-07, "loss": 0.3759, "step": 596 }, { "epoch": 1.78, "logps_train/policy_1_2": -144.38037109375, "logps_train/policy_1_l": -135.80535888671875, "logps_train/policy_1_w": -111.70701599121094, "logps_train/policy_2_2": -96.61224365234375, "logps_train/policy_2_w": -164.6909637451172, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": 1.1729016304016113, "rewards_train/1-l": -2.2602245807647705, "rewards_train/1-w": 3.029689311981201, "rewards_train/2-2": 2.849713087081909, "rewards_train/2-w": 0.6383249759674072, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.289913892745972, "rewards_train/margins_1": 1.8567876815795898, "rewards_train/margins_2": 2.211388111114502, "step": 596 }, { "epoch": 1.78, "logps_train/policy_1_2": -69.62744903564453, "logps_train/policy_1_l": -120.00291442871094, "logps_train/policy_1_w": -57.57796859741211, "logps_train/policy_2_2": -51.843997955322266, "logps_train/policy_2_w": -79.84396362304688, "logps_train/ref_1_2": -75.0, "logps_train/ref_1_l": -101.5, "logps_train/ref_1_w": -74.0, "logps_train/ref_2_2": -65.0, "logps_train/ref_2_w": -88.0, "rewards_train/1-2": 0.5130365490913391, "rewards_train/1-l": -1.863232135772705, "rewards_train/1-w": 1.6562656164169312, "rewards_train/2-2": 1.351928472518921, "rewards_train/2-w": 0.8499783277511597, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.5194977521896362, "rewards_train/margins_1": 1.143229067325592, "rewards_train/margins_2": 0.5019501447677612, "step": 596 }, { "epoch": 1.78, "logps_train/policy_1_2": -126.07135009765625, "logps_train/policy_1_l": -179.3765869140625, "logps_train/policy_1_w": -109.00137329101562, "logps_train/policy_2_2": -92.2420883178711, "logps_train/policy_2_w": -148.7437744140625, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.6506773233413696, "rewards_train/1-l": -2.4849257469177246, "rewards_train/1-w": 3.0428309440612793, "rewards_train/2-2": 2.6660256385803223, "rewards_train/2-w": 1.274841547012329, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.527756690979004, "rewards_train/margins_1": 1.3921536207199097, "rewards_train/margins_2": 1.3911840915679932, "step": 596 }, { "epoch": 1.78, "logps_train/policy_1_2": -141.10116577148438, "logps_train/policy_1_l": -157.90769958496094, "logps_train/policy_1_w": -69.48336029052734, "logps_train/policy_2_2": -108.27947235107422, "logps_train/policy_2_w": -102.32292938232422, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 1.5883209705352783, "rewards_train/1-l": -2.9493632316589355, "rewards_train/1-w": 2.8051791191101074, "rewards_train/2-2": 3.1033029556274414, "rewards_train/2-w": 1.5442695617675781, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.754542350769043, "rewards_train/margins_1": 1.216858148574829, "rewards_train/margins_2": 1.5590333938598633, "step": 596 }, { "epoch": 1.78, "logps_train/policy_1_2": -79.04907989501953, "logps_train/policy_1_l": -42.049171447753906, "logps_train/policy_1_w": -60.633148193359375, "logps_train/policy_2_2": -54.414222717285156, "logps_train/policy_2_w": -77.03435516357422, "logps_train/ref_1_2": -89.0, "logps_train/ref_1_l": -34.0, "logps_train/ref_1_w": -82.0, "logps_train/ref_2_2": -76.5, "logps_train/ref_2_w": -87.0, "rewards_train/1-2": 0.964623212814331, "rewards_train/1-l": -0.8017920255661011, "rewards_train/1-w": 2.1645171642303467, "rewards_train/2-2": 2.2093586921691895, "rewards_train/2-w": 0.9899238348007202, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.9663091897964478, "rewards_train/margins_1": 1.1998939514160156, "rewards_train/margins_2": 1.2194348573684692, "step": 596 }, { "epoch": 1.78, "logps_train/policy_1_2": -78.25093841552734, "logps_train/policy_1_l": -77.6805648803711, "logps_train/policy_1_w": -46.202003479003906, "logps_train/policy_2_2": -56.36046600341797, "logps_train/policy_2_w": -67.75541687011719, "logps_train/ref_1_2": -88.5, "logps_train/ref_1_l": -62.75, "logps_train/ref_1_w": -65.5, "logps_train/ref_2_2": -75.5, "logps_train/ref_2_w": -76.0, "rewards_train/1-2": 1.020804762840271, "rewards_train/1-l": -1.4875876903533936, "rewards_train/1-w": 1.902065634727478, "rewards_train/2-2": 1.897937536239624, "rewards_train/2-w": 0.8389114737510681, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3896533250808716, "rewards_train/margins_1": 0.881260871887207, "rewards_train/margins_2": 1.059026062488556, "step": 596 }, { "epoch": 1.78, "logps_train/policy_1_2": -134.86935424804688, "logps_train/policy_1_l": -141.28399658203125, "logps_train/policy_1_w": -98.41130065917969, "logps_train/policy_2_2": -106.11914825439453, "logps_train/policy_2_w": -123.87727355957031, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 2.452127456665039, "rewards_train/1-l": -2.15437650680542, "rewards_train/1-w": 3.1623854637145996, "rewards_train/2-2": 3.56386661529541, "rewards_train/2-w": 2.115494728088379, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.3167619705200195, "rewards_train/margins_1": 0.7102580070495605, "rewards_train/margins_2": 1.4483718872070312, "step": 596 }, { "epoch": 1.78, "logps_train/policy_1_2": -156.876708984375, "logps_train/policy_1_l": -167.34054565429688, "logps_train/policy_1_w": -90.66986846923828, "logps_train/policy_2_2": -121.29733276367188, "logps_train/policy_2_w": -122.44839477539062, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": 1.2279541492462158, "rewards_train/1-l": -1.9903039932250977, "rewards_train/1-w": 2.3642632961273193, "rewards_train/2-2": 2.6718287467956543, "rewards_train/2-w": 1.4871914386749268, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.354567289352417, "rewards_train/margins_1": 1.1363091468811035, "rewards_train/margins_2": 1.1846373081207275, "step": 596 }, { "epoch": 1.79, "logps_train/policy_1_2": -158.7667694091797, "logps_train/policy_1_l": -189.10934448242188, "logps_train/policy_1_w": -167.261962890625, "logps_train/policy_2_2": -125.0671615600586, "logps_train/policy_2_w": -230.0907745361328, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -247.0, "rewards_train/1-2": 2.501448154449463, "rewards_train/1-l": -2.155856132507324, "rewards_train/1-w": 4.076928615570068, "rewards_train/2-2": 3.540159225463867, "rewards_train/2-w": 1.7346723079681396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.232784748077393, "rewards_train/margins_1": 1.5754804611206055, "rewards_train/margins_2": 1.8054869174957275, "step": 597 }, { "epoch": 1.79, "logps_train/policy_1_2": -89.29444885253906, "logps_train/policy_1_l": -110.83389282226562, "logps_train/policy_1_w": -93.31558227539062, "logps_train/policy_2_2": -66.0500717163086, "logps_train/policy_2_w": -122.9792251586914, "logps_train/ref_1_2": -101.5, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -88.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.2461661100387573, "rewards_train/1-l": -1.6656893491744995, "rewards_train/1-w": 2.968441963195801, "rewards_train/2-2": 2.2287819385528564, "rewards_train/2-w": 1.698952555656433, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.6341313123703, "rewards_train/margins_1": 1.7222758531570435, "rewards_train/margins_2": 0.5298293828964233, "step": 597 }, { "epoch": 1.79, "logps_train/policy_1_2": -138.3147430419922, "logps_train/policy_1_l": -137.6322021484375, "logps_train/policy_1_w": -162.4065399169922, "logps_train/policy_2_2": -94.77700805664062, "logps_train/policy_2_w": -215.3408660888672, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.2400097846984863, "rewards_train/1-l": -1.1608766317367554, "rewards_train/1-w": 2.3249707221984863, "rewards_train/2-2": 2.0851895809173584, "rewards_train/2-w": 0.2190389633178711, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4858473539352417, "rewards_train/margins_1": 1.0849609375, "rewards_train/margins_2": 1.8661506175994873, "step": 597 }, { "epoch": 1.79, "logps_train/policy_1_2": -128.66140747070312, "logps_train/policy_1_l": -136.1813507080078, "logps_train/policy_1_w": -78.93677520751953, "logps_train/policy_2_2": -93.6037368774414, "logps_train/policy_2_w": -108.43626403808594, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 1.5026103258132935, "rewards_train/1-l": -2.308272361755371, "rewards_train/1-w": 2.7110097408294678, "rewards_train/2-2": 2.6427507400512695, "rewards_train/2-w": 1.34856116771698, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.019282102584839, "rewards_train/margins_1": 1.2083994150161743, "rewards_train/margins_2": 1.2941895723342896, "step": 597 }, { "epoch": 1.79, "logps_train/policy_1_2": -158.75396728515625, "logps_train/policy_1_l": -168.65420532226562, "logps_train/policy_1_w": -128.1260223388672, "logps_train/policy_2_2": -121.50597381591797, "logps_train/policy_2_w": -190.15675354003906, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.5136659145355225, "rewards_train/1-l": -1.0308501720428467, "rewards_train/1-w": 3.545210599899292, "rewards_train/2-2": 2.9525275230407715, "rewards_train/2-w": 1.3374497890472412, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.576060771942139, "rewards_train/margins_1": 2.0315446853637695, "rewards_train/margins_2": 1.6150777339935303, "step": 597 }, { "epoch": 1.79, "logps_train/policy_1_2": -103.79052734375, "logps_train/policy_1_l": -138.69007873535156, "logps_train/policy_1_w": -79.376953125, "logps_train/policy_2_2": -89.37584686279297, "logps_train/policy_2_w": -103.45589447021484, "logps_train/ref_1_2": -115.5, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 1.170165777206421, "rewards_train/1-l": -1.663539171218872, "rewards_train/1-w": 2.61855411529541, "rewards_train/2-2": 2.03751277923584, "rewards_train/2-w": 1.8067541122436523, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.282093286514282, "rewards_train/margins_1": 1.4483883380889893, "rewards_train/margins_2": 0.2307586669921875, "step": 597 }, { "epoch": 1.79, "logps_train/policy_1_2": -193.0796356201172, "logps_train/policy_1_l": -197.77589416503906, "logps_train/policy_1_w": -124.28530883789062, "logps_train/policy_2_2": -135.60440063476562, "logps_train/policy_2_w": -191.1512451171875, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 1.368598461151123, "rewards_train/1-l": -2.889308214187622, "rewards_train/1-w": 3.687094211578369, "rewards_train/2-2": 3.174715995788574, "rewards_train/2-w": 1.2067508697509766, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.576402425765991, "rewards_train/margins_1": 2.318495750427246, "rewards_train/margins_2": 1.9679651260375977, "step": 597 }, { "epoch": 1.79, "logps_train/policy_1_2": -192.43580627441406, "logps_train/policy_1_l": -195.12222290039062, "logps_train/policy_1_w": -133.60479736328125, "logps_train/policy_2_2": -148.1158447265625, "logps_train/policy_2_w": -191.860107421875, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 2.468919515609741, "rewards_train/1-l": -2.0231592655181885, "rewards_train/1-w": 4.570771217346191, "rewards_train/2-2": 4.67279052734375, "rewards_train/2-w": 2.18117618560791, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.59393048286438, "rewards_train/margins_1": 2.10185170173645, "rewards_train/margins_2": 2.49161434173584, "step": 597 }, { "epoch": 1.79, "learning_rate": 1.5645084716469778e-07, "loss": 0.4096, "step": 598 }, { "epoch": 1.79, "logps_train/policy_1_2": -80.84967803955078, "logps_train/policy_1_l": -92.52940368652344, "logps_train/policy_1_w": -83.98861694335938, "logps_train/policy_2_2": -63.493499755859375, "logps_train/policy_2_w": -110.0687026977539, "logps_train/ref_1_2": -92.0, "logps_train/ref_1_l": -77.5, "logps_train/ref_1_w": -109.5, "logps_train/ref_2_2": -82.5, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 1.1119067668914795, "rewards_train/1-l": -1.5230576992034912, "rewards_train/1-w": 2.5464510917663574, "rewards_train/2-2": 1.894790768623352, "rewards_train/2-w": 1.4853171110153198, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.069508790969849, "rewards_train/margins_1": 1.434544324874878, "rewards_train/margins_2": 0.4094736576080322, "step": 598 }, { "epoch": 1.79, "logps_train/policy_1_2": -92.39584350585938, "logps_train/policy_1_l": -214.99423217773438, "logps_train/policy_1_w": -105.46484375, "logps_train/policy_2_2": -74.18545532226562, "logps_train/policy_2_w": -140.18019104003906, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -97.5, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.658071756362915, "rewards_train/1-l": -2.830672264099121, "rewards_train/1-w": 3.291015625, "rewards_train/2-2": 2.3345789909362793, "rewards_train/2-w": 1.8163552284240723, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.121687889099121, "rewards_train/margins_1": 1.632943868637085, "rewards_train/margins_2": 0.518223762512207, "step": 598 }, { "epoch": 1.79, "logps_train/policy_1_2": -154.50230407714844, "logps_train/policy_1_l": -89.46558380126953, "logps_train/policy_1_w": -108.33025360107422, "logps_train/policy_2_2": -98.4488296508789, "logps_train/policy_2_w": -163.61410522460938, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -68.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.6028944253921509, "rewards_train/1-l": -2.0801525115966797, "rewards_train/1-w": 2.622053623199463, "rewards_train/2-2": 3.167616844177246, "rewards_train/2-w": 0.39171552658081055, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.702206134796143, "rewards_train/margins_1": 2.019159197807312, "rewards_train/margins_2": 2.7759013175964355, "step": 598 }, { "epoch": 1.79, "logps_train/policy_1_2": -143.06251525878906, "logps_train/policy_1_l": -142.62594604492188, "logps_train/policy_1_w": -99.48816680908203, "logps_train/policy_2_2": -104.80874633789062, "logps_train/policy_2_w": -137.84375, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.7546865940093994, "rewards_train/1-l": -2.620992660522461, "rewards_train/1-w": 3.2981557846069336, "rewards_train/2-2": 3.0808444023132324, "rewards_train/2-w": 1.7272453308105469, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.9191484451293945, "rewards_train/margins_1": 1.5434691905975342, "rewards_train/margins_2": 1.3535990715026855, "step": 598 }, { "epoch": 1.79, "logps_train/policy_1_2": -123.45893859863281, "logps_train/policy_1_l": -114.20430755615234, "logps_train/policy_1_w": -118.15508270263672, "logps_train/policy_2_2": -83.80972290039062, "logps_train/policy_2_w": -170.96450805664062, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -96.5, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.0455119609832764, "rewards_train/1-l": -1.7620325088500977, "rewards_train/1-w": 2.8031439781188965, "rewards_train/2-2": 2.7244961261749268, "rewards_train/2-w": 0.6547223925590515, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.565176486968994, "rewards_train/margins_1": 1.7576320171356201, "rewards_train/margins_2": 2.0697737336158752, "step": 598 }, { "epoch": 1.79, "logps_train/policy_1_2": -158.19363403320312, "logps_train/policy_1_l": -176.92718505859375, "logps_train/policy_1_w": -123.79275512695312, "logps_train/policy_2_2": -131.82119750976562, "logps_train/policy_2_w": -164.4797821044922, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.5986053943634033, "rewards_train/1-l": -3.0466244220733643, "rewards_train/1-w": 3.373849391937256, "rewards_train/2-2": 2.3795993328094482, "rewards_train/2-w": 1.5434287786483765, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.42047381401062, "rewards_train/margins_1": 1.7752439975738525, "rewards_train/margins_2": 0.8361705541610718, "step": 598 }, { "epoch": 1.79, "logps_train/policy_1_2": -184.9441375732422, "logps_train/policy_1_l": -200.63616943359375, "logps_train/policy_1_w": -124.26350402832031, "logps_train/policy_2_2": -133.7274627685547, "logps_train/policy_2_w": -173.8775634765625, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 2.010274887084961, "rewards_train/1-l": -2.142913818359375, "rewards_train/1-w": 3.540837526321411, "rewards_train/2-2": 3.6936604976654053, "rewards_train/2-w": 1.3536500930786133, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.683751344680786, "rewards_train/margins_1": 1.5305626392364502, "rewards_train/margins_2": 2.340010404586792, "step": 598 }, { "epoch": 1.79, "logps_train/policy_1_2": -169.00979614257812, "logps_train/policy_1_l": -107.26789855957031, "logps_train/policy_1_w": -89.32217407226562, "logps_train/policy_2_2": -128.28468322753906, "logps_train/policy_2_w": -126.8087158203125, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -124.5, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 2.1130833625793457, "rewards_train/1-l": -1.4267902374267578, "rewards_train/1-w": 3.4943456649780273, "rewards_train/2-2": 3.6262192726135254, "rewards_train/2-w": 2.1331911087036133, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.921135902404785, "rewards_train/margins_1": 1.3812623023986816, "rewards_train/margins_2": 1.493028163909912, "step": 598 }, { "epoch": 1.79, "logps_train/policy_1_2": -127.6451416015625, "logps_train/policy_1_l": -173.37628173828125, "logps_train/policy_1_w": -103.8780288696289, "logps_train/policy_2_2": -93.98014831542969, "logps_train/policy_2_w": -135.4327392578125, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.638611078262329, "rewards_train/1-l": -2.3188772201538086, "rewards_train/1-w": 2.985438823699951, "rewards_train/2-2": 2.4582345485687256, "rewards_train/2-w": 1.7418826818466187, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.30431604385376, "rewards_train/margins_1": 1.346827745437622, "rewards_train/margins_2": 0.7163518667221069, "step": 599 }, { "epoch": 1.79, "logps_train/policy_1_2": -109.01563262939453, "logps_train/policy_1_l": -205.01303100585938, "logps_train/policy_1_w": -154.3464813232422, "logps_train/policy_2_2": -87.21540069580078, "logps_train/policy_2_w": -190.75851440429688, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": 1.8843743801116943, "rewards_train/1-l": -3.0530600547790527, "rewards_train/1-w": 3.623164653778076, "rewards_train/2-2": 2.48978853225708, "rewards_train/2-w": 1.6553990840911865, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.676224708557129, "rewards_train/margins_1": 1.7387902736663818, "rewards_train/margins_2": 0.8343894481658936, "step": 599 }, { "epoch": 1.79, "logps_train/policy_1_2": -122.68601989746094, "logps_train/policy_1_l": -186.36753845214844, "logps_train/policy_1_w": -135.73919677734375, "logps_train/policy_2_2": -102.70475006103516, "logps_train/policy_2_w": -166.50575256347656, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 2.0532732009887695, "rewards_train/1-l": -2.2805042266845703, "rewards_train/1-w": 3.0690500736236572, "rewards_train/2-2": 2.685774803161621, "rewards_train/2-w": 1.9724717140197754, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.3495543003082275, "rewards_train/margins_1": 1.0157768726348877, "rewards_train/margins_2": 0.7133030891418457, "step": 599 }, { "epoch": 1.79, "logps_train/policy_1_2": -225.5522003173828, "logps_train/policy_1_l": -237.67361450195312, "logps_train/policy_1_w": -108.19776916503906, "logps_train/policy_2_2": -180.0099334716797, "logps_train/policy_2_w": -143.32177734375, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 0.813530445098877, "rewards_train/1-l": -3.1095480918884277, "rewards_train/1-w": 2.616941452026367, "rewards_train/2-2": 2.699007034301758, "rewards_train/2-w": 1.7186031341552734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.726489543914795, "rewards_train/margins_1": 1.8034110069274902, "rewards_train/margins_2": 0.9804039001464844, "step": 599 }, { "epoch": 1.79, "logps_train/policy_1_2": -243.49935913085938, "logps_train/policy_1_l": -250.71502685546875, "logps_train/policy_1_w": -119.73932647705078, "logps_train/policy_2_2": -200.45880126953125, "logps_train/policy_2_w": -143.46603393554688, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -246.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 2.634437084197998, "rewards_train/1-l": -2.484783411026001, "rewards_train/1-w": 3.1284115314483643, "rewards_train/2-2": 4.44474458694458, "rewards_train/2-w": 2.2526144981384277, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.613194942474365, "rewards_train/margins_1": 0.4939744472503662, "rewards_train/margins_2": 2.1921300888061523, "step": 599 }, { "epoch": 1.79, "logps_train/policy_1_2": -139.58335876464844, "logps_train/policy_1_l": -92.06061553955078, "logps_train/policy_1_w": -75.98686981201172, "logps_train/policy_2_2": -99.33997344970703, "logps_train/policy_2_w": -104.34489440917969, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -76.0, "logps_train/ref_1_w": -99.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 1.0854144096374512, "rewards_train/1-l": -1.6403390169143677, "rewards_train/1-w": 2.312251091003418, "rewards_train/2-2": 2.8613152503967285, "rewards_train/2-w": 0.9823079109191895, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.9525901079177856, "rewards_train/margins_1": 1.2268366813659668, "rewards_train/margins_2": 1.879007339477539, "step": 599 }, { "epoch": 1.79, "logps_train/policy_1_2": -230.99749755859375, "logps_train/policy_1_l": -313.70501708984375, "logps_train/policy_1_w": -217.2751007080078, "logps_train/policy_2_2": -186.4835205078125, "logps_train/policy_2_w": -287.1181640625, "logps_train/ref_1_2": -253.0, "logps_train/ref_1_l": -276.0, "logps_train/ref_1_w": -268.0, "logps_train/ref_2_2": -234.0, "logps_train/ref_2_w": -312.0, "rewards_train/1-2": 2.2127509117126465, "rewards_train/1-l": -3.761125087738037, "rewards_train/1-w": 4.960966110229492, "rewards_train/2-2": 4.709461212158203, "rewards_train/2-w": 2.5522475242614746, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 8.72209119796753, "rewards_train/margins_1": 2.7482151985168457, "rewards_train/margins_2": 2.1572136878967285, "step": 599 }, { "epoch": 1.79, "logps_train/policy_1_2": -208.382568359375, "logps_train/policy_1_l": -228.44712829589844, "logps_train/policy_1_w": -145.17001342773438, "logps_train/policy_2_2": -184.74575805664062, "logps_train/policy_2_w": -178.07125854492188, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 2.750025510787964, "rewards_train/1-l": -2.4150257110595703, "rewards_train/1-w": 3.784560203552246, "rewards_train/2-2": 3.7418313026428223, "rewards_train/2-w": 2.6256866455078125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.199585914611816, "rewards_train/margins_1": 1.0345346927642822, "rewards_train/margins_2": 1.1161446571350098, "step": 599 }, { "epoch": 1.8, "learning_rate": 1.4796544688663623e-07, "loss": 0.374, "step": 600 }, { "epoch": 1.8, "logps_train/policy_1_2": -113.65977478027344, "logps_train/policy_1_l": -170.3668975830078, "logps_train/policy_1_w": -100.09326171875, "logps_train/policy_2_2": -86.40573120117188, "logps_train/policy_2_w": -129.54525756835938, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.0335341691970825, "rewards_train/1-l": -2.7814159393310547, "rewards_train/1-w": 2.2257328033447266, "rewards_train/2-2": 2.1979527473449707, "rewards_train/2-w": 0.9962549209594727, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.007148742675781, "rewards_train/margins_1": 1.192198634147644, "rewards_train/margins_2": 1.201697826385498, "step": 600 }, { "epoch": 1.8, "logps_train/policy_1_2": -125.79399108886719, "logps_train/policy_1_l": -183.59765625, "logps_train/policy_1_w": -108.30535888671875, "logps_train/policy_2_2": -92.78074645996094, "logps_train/policy_2_w": -165.01651000976562, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.695601761341095, "rewards_train/1-l": -1.6078133583068848, "rewards_train/1-w": 2.9377260208129883, "rewards_train/2-2": 2.1184096336364746, "rewards_train/2-w": 0.23663055896759033, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.545539379119873, "rewards_train/margins_1": 2.2421242594718933, "rewards_train/margins_2": 1.8817790746688843, "step": 600 }, { "epoch": 1.8, "logps_train/policy_1_2": -180.59182739257812, "logps_train/policy_1_l": -246.83436584472656, "logps_train/policy_1_w": -142.23178100585938, "logps_train/policy_2_2": -139.82562255859375, "logps_train/policy_2_w": -184.13601684570312, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -215.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 2.1361303329467773, "rewards_train/1-l": -3.1217188835144043, "rewards_train/1-w": 3.565103530883789, "rewards_train/2-2": 3.7299373149871826, "rewards_train/2-w": 1.9168663024902344, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.686822414398193, "rewards_train/margins_1": 1.4289731979370117, "rewards_train/margins_2": 1.8130710124969482, "step": 600 }, { "epoch": 1.8, "logps_train/policy_1_2": -78.07129669189453, "logps_train/policy_1_l": -74.22514343261719, "logps_train/policy_1_w": -32.680049896240234, "logps_train/policy_2_2": -53.144744873046875, "logps_train/policy_2_w": -52.47344970703125, "logps_train/ref_1_2": -86.0, "logps_train/ref_1_l": -58.25, "logps_train/ref_1_w": -47.5, "logps_train/ref_2_2": -70.0, "logps_train/ref_2_w": -58.25, "rewards_train/1-2": 0.7934561967849731, "rewards_train/1-l": -1.5941450595855713, "rewards_train/1-w": 1.4722294807434082, "rewards_train/2-2": 1.6617947816848755, "rewards_train/2-w": 0.5698422193527222, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0663745403289795, "rewards_train/margins_1": 0.6787732839584351, "rewards_train/margins_2": 1.0919525623321533, "step": 600 }, { "epoch": 1.8, "logps_train/policy_1_2": -169.11917114257812, "logps_train/policy_1_l": -242.11968994140625, "logps_train/policy_1_w": -178.97467041015625, "logps_train/policy_2_2": -137.58660888671875, "logps_train/policy_2_w": -228.09854125976562, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 2.538864850997925, "rewards_train/1-l": -3.122905731201172, "rewards_train/1-w": 4.059563636779785, "rewards_train/2-2": 3.6991515159606934, "rewards_train/2-w": 1.9003019332885742, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.182469367980957, "rewards_train/margins_1": 1.5206987857818604, "rewards_train/margins_2": 1.7988495826721191, "step": 600 }, { "epoch": 1.8, "logps_train/policy_1_2": -188.26736450195312, "logps_train/policy_1_l": -130.24993896484375, "logps_train/policy_1_w": -107.70853424072266, "logps_train/policy_2_2": -140.9646759033203, "logps_train/policy_2_w": -159.29486083984375, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 0.9935758113861084, "rewards_train/1-l": -1.1566352844238281, "rewards_train/1-w": 3.3353967666625977, "rewards_train/2-2": 3.064469337463379, "rewards_train/2-w": 1.648638367652893, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.492032051086426, "rewards_train/margins_1": 2.3418209552764893, "rewards_train/margins_2": 1.4158309698104858, "step": 600 }, { "epoch": 1.8, "logps_train/policy_1_2": -183.0247802734375, "logps_train/policy_1_l": -246.27825927734375, "logps_train/policy_1_w": -142.01681518554688, "logps_train/policy_2_2": -153.33541870117188, "logps_train/policy_2_w": -181.85415649414062, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -219.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 2.4006471633911133, "rewards_train/1-l": -2.708683490753174, "rewards_train/1-w": 3.953005790710449, "rewards_train/2-2": 3.853957176208496, "rewards_train/2-w": 2.156771183013916, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.661689281463623, "rewards_train/margins_1": 1.552358627319336, "rewards_train/margins_2": 1.69718599319458, "step": 600 }, { "epoch": 1.8, "logps_train/policy_1_2": -165.737060546875, "logps_train/policy_1_l": -243.3450469970703, "logps_train/policy_1_w": -133.125244140625, "logps_train/policy_2_2": -125.37675476074219, "logps_train/policy_2_w": -169.35556030273438, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -220.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.7028567790985107, "rewards_train/1-l": -2.2874341011047363, "rewards_train/1-w": 3.43356990814209, "rewards_train/2-2": 2.7998251914978027, "rewards_train/2-w": 1.5700089931488037, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.721004009246826, "rewards_train/margins_1": 1.730713129043579, "rewards_train/margins_2": 1.229816198348999, "step": 600 }, { "epoch": 1.8, "logps_train/policy_1_2": -119.28740692138672, "logps_train/policy_1_l": -96.28239440917969, "logps_train/policy_1_w": -97.69851684570312, "logps_train/policy_2_2": -84.39368438720703, "logps_train/policy_2_w": -135.44793701171875, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -79.5, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.3821966648101807, "rewards_train/1-l": -1.6426923274993896, "rewards_train/1-w": 3.2379610538482666, "rewards_train/2-2": 2.6340689659118652, "rewards_train/2-w": 1.3192687034606934, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.880653381347656, "rewards_train/margins_1": 1.855764389038086, "rewards_train/margins_2": 1.3148002624511719, "step": 601 }, { "epoch": 1.8, "logps_train/policy_1_2": -95.98483276367188, "logps_train/policy_1_l": -144.874755859375, "logps_train/policy_1_w": -80.7643051147461, "logps_train/policy_2_2": -68.38327026367188, "logps_train/policy_2_w": -115.99677276611328, "logps_train/ref_1_2": -105.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 0.9296423196792603, "rewards_train/1-l": -3.156616687774658, "rewards_train/1-w": 2.3860695362091064, "rewards_train/2-2": 1.8784698247909546, "rewards_train/2-w": 0.8909480571746826, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.542686223983765, "rewards_train/margins_1": 1.4564272165298462, "rewards_train/margins_2": 0.987521767616272, "step": 601 }, { "epoch": 1.8, "logps_train/policy_1_2": -94.76844787597656, "logps_train/policy_1_l": -124.28119659423828, "logps_train/policy_1_w": -89.37844848632812, "logps_train/policy_2_2": -68.04954528808594, "logps_train/policy_2_w": -130.62042236328125, "logps_train/ref_1_2": -109.5, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 1.4715925455093384, "rewards_train/1-l": -2.4050729274749756, "rewards_train/1-w": 3.351217269897461, "rewards_train/2-2": 2.4942641258239746, "rewards_train/2-w": 1.812957525253296, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.7562901973724365, "rewards_train/margins_1": 1.8796247243881226, "rewards_train/margins_2": 0.6813066005706787, "step": 601 }, { "epoch": 1.8, "logps_train/policy_1_2": -128.83944702148438, "logps_train/policy_1_l": -167.30032348632812, "logps_train/policy_1_w": -104.5882568359375, "logps_train/policy_2_2": -94.2420883178711, "logps_train/policy_2_w": -141.78944396972656, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 1.2832417488098145, "rewards_train/1-l": -2.3456571102142334, "rewards_train/1-w": 2.9872686862945557, "rewards_train/2-2": 2.5539159774780273, "rewards_train/2-w": 1.729649543762207, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.332925796508789, "rewards_train/margins_1": 1.7040269374847412, "rewards_train/margins_2": 0.8242664337158203, "step": 601 }, { "epoch": 1.8, "logps_train/policy_1_2": -223.66949462890625, "logps_train/policy_1_l": -243.03573608398438, "logps_train/policy_1_w": -104.73343658447266, "logps_train/policy_2_2": -191.60488891601562, "logps_train/policy_2_w": -137.0079345703125, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -226.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 2.015862464904785, "rewards_train/1-l": -3.7074806690216064, "rewards_train/1-w": 3.0680627822875977, "rewards_train/2-2": 3.5402917861938477, "rewards_train/2-w": 1.6716669797897339, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.775543451309204, "rewards_train/margins_1": 1.0522003173828125, "rewards_train/margins_2": 1.8686248064041138, "step": 601 }, { "epoch": 1.8, "logps_train/policy_1_2": -191.0089874267578, "logps_train/policy_1_l": -198.43406677246094, "logps_train/policy_1_w": -155.1549530029297, "logps_train/policy_2_2": -148.0895538330078, "logps_train/policy_2_w": -199.38414001464844, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -223.0, "rewards_train/1-2": 1.6272268295288086, "rewards_train/1-l": -2.144578456878662, "rewards_train/1-w": 4.043879508972168, "rewards_train/2-2": 3.420731544494629, "rewards_train/2-w": 2.3897109031677246, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.18845796585083, "rewards_train/margins_1": 2.4166526794433594, "rewards_train/margins_2": 1.0310206413269043, "step": 601 }, { "epoch": 1.8, "logps_train/policy_1_2": -245.55316162109375, "logps_train/policy_1_l": -175.12881469726562, "logps_train/policy_1_w": -129.478759765625, "logps_train/policy_2_2": -175.51683044433594, "logps_train/policy_2_w": -183.8441162109375, "logps_train/ref_1_2": -256.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": 1.0087472200393677, "rewards_train/1-l": -1.9968669414520264, "rewards_train/1-w": 3.5458736419677734, "rewards_train/2-2": 3.771754264831543, "rewards_train/2-w": 1.3655874729156494, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.5427405834198, "rewards_train/margins_1": 2.5371264219284058, "rewards_train/margins_2": 2.4061667919158936, "step": 601 }, { "epoch": 1.8, "logps_train/policy_1_2": -102.76335144042969, "logps_train/policy_1_l": -108.08932495117188, "logps_train/policy_1_w": -77.18327331542969, "logps_train/policy_2_2": -69.71134185791016, "logps_train/policy_2_w": -105.88009643554688, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 1.2451491355895996, "rewards_train/1-l": -1.3638153076171875, "rewards_train/1-w": 2.373274326324463, "rewards_train/2-2": 2.362849712371826, "rewards_train/2-w": 1.2010531425476074, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7370896339416504, "rewards_train/margins_1": 1.1281251907348633, "rewards_train/margins_2": 1.1617965698242188, "step": 601 }, { "epoch": 1.8, "learning_rate": 1.3970960103457722e-07, "loss": 0.3914, "step": 602 }, { "epoch": 1.8, "logps_train/policy_1_2": -153.26690673828125, "logps_train/policy_1_l": -121.90319061279297, "logps_train/policy_1_w": -89.85924530029297, "logps_train/policy_2_2": -106.88988494873047, "logps_train/policy_2_w": -132.60140991210938, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 1.4811216592788696, "rewards_train/1-l": -0.9993429183959961, "rewards_train/1-w": 3.3625130653381348, "rewards_train/2-2": 2.9774179458618164, "rewards_train/2-w": 1.875014066696167, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.361855983734131, "rewards_train/margins_1": 1.8813914060592651, "rewards_train/margins_2": 1.1024038791656494, "step": 602 }, { "epoch": 1.8, "logps_train/policy_1_2": -176.29928588867188, "logps_train/policy_1_l": -161.9877471923828, "logps_train/policy_1_w": -167.38632202148438, "logps_train/policy_2_2": -143.41046142578125, "logps_train/policy_2_w": -199.24862670898438, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 2.7263224124908447, "rewards_train/1-l": -2.5042431354522705, "rewards_train/1-w": 4.389492988586426, "rewards_train/2-2": 3.7652029991149902, "rewards_train/2-w": 2.9626381397247314, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.893736124038696, "rewards_train/margins_1": 1.663170576095581, "rewards_train/margins_2": 0.8025648593902588, "step": 602 }, { "epoch": 1.8, "logps_train/policy_1_2": -80.56024169921875, "logps_train/policy_1_l": -73.21261596679688, "logps_train/policy_1_w": -70.60643768310547, "logps_train/policy_2_2": -59.488792419433594, "logps_train/policy_2_w": -100.81736755371094, "logps_train/ref_1_2": -87.0, "logps_train/ref_1_l": -58.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -74.5, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 0.6451476812362671, "rewards_train/1-l": -1.4614956378936768, "rewards_train/1-w": 3.0424811840057373, "rewards_train/2-2": 1.5030739307403564, "rewards_train/2-w": 1.3151379823684692, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.503976821899414, "rewards_train/margins_1": 2.39733350276947, "rewards_train/margins_2": 0.1879359483718872, "step": 602 }, { "epoch": 1.8, "logps_train/policy_1_2": -86.80827331542969, "logps_train/policy_1_l": -278.99786376953125, "logps_train/policy_1_w": -74.909423828125, "logps_train/policy_2_2": -69.35244750976562, "logps_train/policy_2_w": -101.06004333496094, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -245.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -89.0, "logps_train/ref_2_w": -119.5, "rewards_train/1-2": 1.32229745388031, "rewards_train/1-l": -3.4388482570648193, "rewards_train/1-w": 2.8215579986572266, "rewards_train/2-2": 1.9374120235443115, "rewards_train/2-w": 1.831496000289917, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.260406255722046, "rewards_train/margins_1": 1.4992605447769165, "rewards_train/margins_2": 0.10591602325439453, "step": 602 }, { "epoch": 1.8, "logps_train/policy_1_2": -244.57040405273438, "logps_train/policy_1_l": -199.55169677734375, "logps_train/policy_1_w": -210.77517700195312, "logps_train/policy_2_2": -210.955810546875, "logps_train/policy_2_w": -253.83865356445312, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -252.0, "logps_train/ref_2_2": -250.0, "logps_train/ref_2_w": -276.0, "rewards_train/1-2": 2.6253819465637207, "rewards_train/1-l": -1.8534104824066162, "rewards_train/1-w": 4.162423133850098, "rewards_train/2-2": 3.982543468475342, "rewards_train/2-w": 2.1249234676361084, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.015833616256714, "rewards_train/margins_1": 1.537041187286377, "rewards_train/margins_2": 1.8576200008392334, "step": 602 }, { "epoch": 1.8, "logps_train/policy_1_2": -242.95196533203125, "logps_train/policy_1_l": -202.79429626464844, "logps_train/policy_1_w": -110.22633361816406, "logps_train/policy_2_2": -195.59298706054688, "logps_train/policy_2_w": -147.13671875, "logps_train/ref_1_2": -256.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": 1.4016793966293335, "rewards_train/1-l": -3.454429864883423, "rewards_train/1-w": 2.799241542816162, "rewards_train/2-2": 3.629763603210449, "rewards_train/2-w": 1.9613291025161743, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.253671407699585, "rewards_train/margins_1": 1.3975621461868286, "rewards_train/margins_2": 1.668434500694275, "step": 602 }, { "epoch": 1.8, "logps_train/policy_1_2": -185.6514892578125, "logps_train/policy_1_l": -171.55438232421875, "logps_train/policy_1_w": -136.56565856933594, "logps_train/policy_2_2": -150.37701416015625, "logps_train/policy_2_w": -168.74176025390625, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.8176636695861816, "rewards_train/1-l": -1.8737972974777222, "rewards_train/1-w": 3.2887468338012695, "rewards_train/2-2": 3.3044867515563965, "rewards_train/2-w": 2.007073402404785, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.162544131278992, "rewards_train/margins_1": 1.471083164215088, "rewards_train/margins_2": 1.2974133491516113, "step": 602 }, { "epoch": 1.8, "logps_train/policy_1_2": -165.32037353515625, "logps_train/policy_1_l": -166.85427856445312, "logps_train/policy_1_w": -118.34687805175781, "logps_train/policy_2_2": -132.4369354248047, "logps_train/policy_2_w": -149.562744140625, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 2.3257741928100586, "rewards_train/1-l": -1.6573030948638916, "rewards_train/1-w": 3.2449986934661865, "rewards_train/2-2": 3.192927837371826, "rewards_train/2-w": 2.099097728729248, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.902301788330078, "rewards_train/margins_1": 0.9192245006561279, "rewards_train/margins_2": 1.0938301086425781, "step": 602 }, { "epoch": 1.81, "logps_train/policy_1_2": -130.773681640625, "logps_train/policy_1_l": -182.3203582763672, "logps_train/policy_1_w": -152.07420349121094, "logps_train/policy_2_2": -90.13694763183594, "logps_train/policy_2_w": -216.8367919921875, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 1.3030998706817627, "rewards_train/1-l": -1.2757863998413086, "rewards_train/1-w": 3.581641674041748, "rewards_train/2-2": 2.5675549507141113, "rewards_train/2-w": 0.9100706577301025, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.857428073883057, "rewards_train/margins_1": 2.2785418033599854, "rewards_train/margins_2": 1.6574842929840088, "step": 603 }, { "epoch": 1.81, "logps_train/policy_1_2": -146.37637329101562, "logps_train/policy_1_l": -88.55574035644531, "logps_train/policy_1_w": -42.224205017089844, "logps_train/policy_2_2": -100.20332336425781, "logps_train/policy_2_w": -79.0252685546875, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -76.0, "logps_train/ref_1_w": -66.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -89.0, "rewards_train/1-2": 0.7522059679031372, "rewards_train/1-l": -1.297370433807373, "rewards_train/1-w": 2.3635168075561523, "rewards_train/2-2": 2.709355354309082, "rewards_train/2-w": 0.9974727034568787, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6608872413635254, "rewards_train/margins_1": 1.6113108396530151, "rewards_train/margins_2": 1.7118826508522034, "step": 603 }, { "epoch": 1.81, "logps_train/policy_1_2": -230.32937622070312, "logps_train/policy_1_l": -272.50897216796875, "logps_train/policy_1_w": -147.26809692382812, "logps_train/policy_2_2": -169.21177673339844, "logps_train/policy_2_w": -186.78070068359375, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -236.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.97487473487854, "rewards_train/1-l": -3.6727728843688965, "rewards_train/1-w": 3.3317837715148926, "rewards_train/2-2": 4.506947040557861, "rewards_train/2-w": 2.0281803607940674, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.004556655883789, "rewards_train/margins_1": 1.3569090366363525, "rewards_train/margins_2": 2.478766679763794, "step": 603 }, { "epoch": 1.81, "logps_train/policy_1_2": -181.53172302246094, "logps_train/policy_1_l": -159.03338623046875, "logps_train/policy_1_w": -118.49742126464844, "logps_train/policy_2_2": -150.05224609375, "logps_train/policy_2_w": -162.93020629882812, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.4624522924423218, "rewards_train/1-l": -2.0627126693725586, "rewards_train/1-w": 4.2049455642700195, "rewards_train/2-2": 2.7728993892669678, "rewards_train/2-w": 2.281980037689209, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.267658233642578, "rewards_train/margins_1": 2.7424932718276978, "rewards_train/margins_2": 0.4909193515777588, "step": 603 }, { "epoch": 1.81, "logps_train/policy_1_2": -239.59909057617188, "logps_train/policy_1_l": -117.68463134765625, "logps_train/policy_1_w": -135.7548828125, "logps_train/policy_2_2": -179.13436889648438, "logps_train/policy_2_w": -172.97549438476562, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 0.46821701526641846, "rewards_train/1-l": -1.250884771347046, "rewards_train/1-w": 3.633106231689453, "rewards_train/2-2": 3.7912495136260986, "rewards_train/2-w": 2.079012393951416, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.883991003036499, "rewards_train/margins_1": 3.1648892164230347, "rewards_train/margins_2": 1.7122371196746826, "step": 603 }, { "epoch": 1.81, "logps_train/policy_1_2": -178.94171142578125, "logps_train/policy_1_l": -254.5523223876953, "logps_train/policy_1_w": -179.6239013671875, "logps_train/policy_2_2": -147.45184326171875, "logps_train/policy_2_w": -222.2183837890625, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -232.0, "logps_train/ref_1_w": -221.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -242.0, "rewards_train/1-2": 2.252703905105591, "rewards_train/1-l": -2.2411696910858154, "rewards_train/1-w": 4.118860244750977, "rewards_train/2-2": 3.2516918182373047, "rewards_train/2-w": 1.9969099760055542, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.360029935836792, "rewards_train/margins_1": 1.8661563396453857, "rewards_train/margins_2": 1.2547818422317505, "step": 603 }, { "epoch": 1.81, "logps_train/policy_1_2": -160.1648406982422, "logps_train/policy_1_l": -121.95912170410156, "logps_train/policy_1_w": -144.1965789794922, "logps_train/policy_2_2": -114.32533264160156, "logps_train/policy_2_w": -197.70591735839844, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 1.6647659540176392, "rewards_train/1-l": -1.8216931819915771, "rewards_train/1-w": 4.397529125213623, "rewards_train/2-2": 3.226841688156128, "rewards_train/2-w": 1.8700332641601562, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.2192223072052, "rewards_train/margins_1": 2.732763171195984, "rewards_train/margins_2": 1.3568084239959717, "step": 603 }, { "epoch": 1.81, "logps_train/policy_1_2": -168.54989624023438, "logps_train/policy_1_l": -198.97537231445312, "logps_train/policy_1_w": -159.666015625, "logps_train/policy_2_2": -139.6126251220703, "logps_train/policy_2_w": -200.17547607421875, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -197.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.889151930809021, "rewards_train/1-l": -1.7688260078430176, "rewards_train/1-w": 3.7005856037139893, "rewards_train/2-2": 2.9199862480163574, "rewards_train/2-w": 2.038703441619873, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.469411611557007, "rewards_train/margins_1": 1.8114336729049683, "rewards_train/margins_2": 0.8812828063964844, "step": 603 }, { "epoch": 1.81, "learning_rate": 1.3168411536452153e-07, "loss": 0.3554, "step": 604 }, { "epoch": 1.81, "logps_train/policy_1_2": -224.43186950683594, "logps_train/policy_1_l": -193.3361358642578, "logps_train/policy_1_w": -97.44902038574219, "logps_train/policy_2_2": -180.322998046875, "logps_train/policy_2_w": -123.22115325927734, "logps_train/ref_1_2": -244.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.9503687620162964, "rewards_train/1-l": -2.7307815551757812, "rewards_train/1-w": 2.738692045211792, "rewards_train/2-2": 3.2192625999450684, "rewards_train/2-w": 2.1317903995513916, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.469473600387573, "rewards_train/margins_1": 0.7883232831954956, "rewards_train/margins_2": 1.0874722003936768, "step": 604 }, { "epoch": 1.81, "logps_train/policy_1_2": -111.67803955078125, "logps_train/policy_1_l": -134.17010498046875, "logps_train/policy_1_w": -59.15595626831055, "logps_train/policy_2_2": -84.83672332763672, "logps_train/policy_2_w": -84.62971496582031, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -84.5, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 1.6243836879730225, "rewards_train/1-l": -2.2806332111358643, "rewards_train/1-w": 2.5480761528015137, "rewards_train/2-2": 2.5397653579711914, "rewards_train/2-w": 1.3339041471481323, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.828709363937378, "rewards_train/margins_1": 0.9236924648284912, "rewards_train/margins_2": 1.205861210823059, "step": 604 }, { "epoch": 1.81, "logps_train/policy_1_2": -172.38046264648438, "logps_train/policy_1_l": -229.82835388183594, "logps_train/policy_1_w": -183.77789306640625, "logps_train/policy_2_2": -142.85858154296875, "logps_train/policy_2_w": -226.74966430664062, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -245.0, "rewards_train/1-2": 2.2900784015655518, "rewards_train/1-l": -3.2394747734069824, "rewards_train/1-w": 3.62455415725708, "rewards_train/2-2": 3.358672618865967, "rewards_train/2-w": 1.8062829971313477, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.8640289306640625, "rewards_train/margins_1": 1.3344757556915283, "rewards_train/margins_2": 1.5523896217346191, "step": 604 }, { "epoch": 1.81, "logps_train/policy_1_2": -199.04168701171875, "logps_train/policy_1_l": -136.7832489013672, "logps_train/policy_1_w": -75.74258422851562, "logps_train/policy_2_2": -149.81036376953125, "logps_train/policy_2_w": -110.99485778808594, "logps_train/ref_1_2": -213.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 1.3614557981491089, "rewards_train/1-l": -1.4291062355041504, "rewards_train/1-w": 2.4741790294647217, "rewards_train/2-2": 3.6119322776794434, "rewards_train/2-w": 1.3169198036193848, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.903285264968872, "rewards_train/margins_1": 1.1127232313156128, "rewards_train/margins_2": 2.2950124740600586, "step": 604 }, { "epoch": 1.81, "logps_train/policy_1_2": -204.45156860351562, "logps_train/policy_1_l": -197.146728515625, "logps_train/policy_1_w": -126.1720199584961, "logps_train/policy_2_2": -145.89996337890625, "logps_train/policy_2_w": -183.53677368164062, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.4282796382904053, "rewards_train/1-l": -1.7201417684555054, "rewards_train/1-w": 3.8734230995178223, "rewards_train/2-2": 3.69203519821167, "rewards_train/2-w": 1.4556975364685059, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.593564867973328, "rewards_train/margins_1": 2.445143461227417, "rewards_train/margins_2": 2.236337661743164, "step": 604 }, { "epoch": 1.81, "logps_train/policy_1_2": -172.3889617919922, "logps_train/policy_1_l": -206.24777221679688, "logps_train/policy_1_w": -139.1488800048828, "logps_train/policy_2_2": -118.53580474853516, "logps_train/policy_2_w": -193.34036254882812, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 1.3861031532287598, "rewards_train/1-l": -2.691964626312256, "rewards_train/1-w": 3.2003464698791504, "rewards_train/2-2": 3.2776694297790527, "rewards_train/2-w": 0.77143394947052, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.892311096191406, "rewards_train/margins_1": 1.8142433166503906, "rewards_train/margins_2": 2.5062354803085327, "step": 604 }, { "epoch": 1.81, "logps_train/policy_1_2": -172.39830017089844, "logps_train/policy_1_l": -159.73648071289062, "logps_train/policy_1_w": -127.89447021484375, "logps_train/policy_2_2": -133.49639892578125, "logps_train/policy_2_w": -170.08197021484375, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.9914205074310303, "rewards_train/1-l": -1.734975814819336, "rewards_train/1-w": 3.6687567234039307, "rewards_train/2-2": 3.4941093921661377, "rewards_train/2-w": 2.0871145725250244, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.403732538223267, "rewards_train/margins_1": 1.6773362159729004, "rewards_train/margins_2": 1.4069948196411133, "step": 604 }, { "epoch": 1.81, "logps_train/policy_1_2": -94.62451171875, "logps_train/policy_1_l": -100.78504943847656, "logps_train/policy_1_w": -57.715694427490234, "logps_train/policy_2_2": -74.82520294189453, "logps_train/policy_2_w": -90.26083374023438, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -82.5, "logps_train/ref_1_w": -76.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -96.5, "rewards_train/1-2": 0.9391113519668579, "rewards_train/1-l": -1.832411766052246, "rewards_train/1-w": 1.8130006790161133, "rewards_train/2-2": 1.6440415382385254, "rewards_train/2-w": 0.6207913160324097, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6454124450683594, "rewards_train/margins_1": 0.8738893270492554, "rewards_train/margins_2": 1.0232502222061157, "step": 604 }, { "epoch": 1.81, "logps_train/policy_1_2": -216.87799072265625, "logps_train/policy_1_l": -157.8863525390625, "logps_train/policy_1_w": -105.33634185791016, "logps_train/policy_2_2": -161.63320922851562, "logps_train/policy_2_w": -160.85067749023438, "logps_train/ref_1_2": -237.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 2.0122015476226807, "rewards_train/1-l": -1.5575803518295288, "rewards_train/1-w": 3.2226152420043945, "rewards_train/2-2": 4.646055221557617, "rewards_train/2-w": 1.38993239402771, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.780195593833923, "rewards_train/margins_1": 1.2104136943817139, "rewards_train/margins_2": 3.2561228275299072, "step": 605 }, { "epoch": 1.81, "logps_train/policy_1_2": -197.27462768554688, "logps_train/policy_1_l": -188.83023071289062, "logps_train/policy_1_w": -179.0859375, "logps_train/policy_2_2": -157.6629638671875, "logps_train/policy_2_w": -217.56756591796875, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 2.1975369453430176, "rewards_train/1-l": -2.1392736434936523, "rewards_train/1-w": 3.8539061546325684, "rewards_train/2-2": 3.639953136444092, "rewards_train/2-w": 2.136993408203125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.993179798126221, "rewards_train/margins_1": 1.6563692092895508, "rewards_train/margins_2": 1.5029597282409668, "step": 605 }, { "epoch": 1.81, "logps_train/policy_1_2": -43.45148468017578, "logps_train/policy_1_l": -44.99571228027344, "logps_train/policy_1_w": -38.64221954345703, "logps_train/policy_2_2": -34.62834930419922, "logps_train/policy_2_w": -53.060760498046875, "logps_train/ref_1_2": -53.0, "logps_train/ref_1_l": -34.25, "logps_train/ref_1_w": -54.75, "logps_train/ref_2_2": -48.5, "logps_train/ref_2_w": -62.0, "rewards_train/1-2": 0.9478199481964111, "rewards_train/1-l": -1.0726184844970703, "rewards_train/1-w": 1.6147818565368652, "rewards_train/2-2": 1.3916573524475098, "rewards_train/2-w": 0.9111117124557495, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.6874003410339355, "rewards_train/margins_1": 0.6669619083404541, "rewards_train/margins_2": 0.48054563999176025, "step": 605 }, { "epoch": 1.81, "logps_train/policy_1_2": -112.95350646972656, "logps_train/policy_1_l": -63.98960876464844, "logps_train/policy_1_w": -36.80127716064453, "logps_train/policy_2_2": -83.57249450683594, "logps_train/policy_2_w": -57.995452880859375, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -46.0, "logps_train/ref_1_w": -57.5, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -70.0, "rewards_train/1-2": 0.8155869841575623, "rewards_train/1-l": -1.8044297695159912, "rewards_train/1-w": 2.063622236251831, "rewards_train/2-2": 2.5255630016326904, "rewards_train/2-w": 1.2246736288070679, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8680520057678223, "rewards_train/margins_1": 1.2480352520942688, "rewards_train/margins_2": 1.3008893728256226, "step": 605 }, { "epoch": 1.81, "logps_train/policy_1_2": -159.66375732421875, "logps_train/policy_1_l": -206.47872924804688, "logps_train/policy_1_w": -183.08074951171875, "logps_train/policy_2_2": -121.79507446289062, "logps_train/policy_2_w": -243.71554565429688, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -222.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -262.0, "rewards_train/1-2": 2.302372932434082, "rewards_train/1-l": -1.7735384702682495, "rewards_train/1-w": 3.952861785888672, "rewards_train/2-2": 3.454087018966675, "rewards_train/2-w": 1.8096957206726074, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.726400256156921, "rewards_train/margins_1": 1.6504888534545898, "rewards_train/margins_2": 1.6443912982940674, "step": 605 }, { "epoch": 1.81, "logps_train/policy_1_2": -196.54354858398438, "logps_train/policy_1_l": -175.46832275390625, "logps_train/policy_1_w": -132.90463256835938, "logps_train/policy_2_2": -152.65162658691406, "logps_train/policy_2_w": -179.45626831054688, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.7188860177993774, "rewards_train/1-l": -1.7562062740325928, "rewards_train/1-w": 3.501333475112915, "rewards_train/2-2": 3.377610683441162, "rewards_train/2-w": 1.8672645092010498, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.257539749145508, "rewards_train/margins_1": 1.7824474573135376, "rewards_train/margins_2": 1.5103461742401123, "step": 605 }, { "epoch": 1.81, "logps_train/policy_1_2": -234.16519165039062, "logps_train/policy_1_l": -183.99896240234375, "logps_train/policy_1_w": -151.3567657470703, "logps_train/policy_2_2": -168.77191162109375, "logps_train/policy_2_w": -201.85009765625, "logps_train/ref_1_2": -243.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 0.922544002532959, "rewards_train/1-l": -1.931145429611206, "rewards_train/1-w": 3.351823329925537, "rewards_train/2-2": 4.052496910095215, "rewards_train/2-w": 1.5399909019470215, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.282968759536743, "rewards_train/margins_1": 2.429279327392578, "rewards_train/margins_2": 2.5125060081481934, "step": 605 }, { "epoch": 1.81, "logps_train/policy_1_2": -100.97715759277344, "logps_train/policy_1_l": -77.67345428466797, "logps_train/policy_1_w": -91.89114379882812, "logps_train/policy_2_2": -67.39469146728516, "logps_train/policy_2_w": -122.72240447998047, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -63.5, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -94.5, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.4061908721923828, "rewards_train/1-l": -1.4048449993133545, "rewards_train/1-w": 3.1976051330566406, "rewards_train/2-2": 2.6831870079040527, "rewards_train/2-w": 2.0847904682159424, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.602450132369995, "rewards_train/margins_1": 1.7914142608642578, "rewards_train/margins_2": 0.5983965396881104, "step": 605 }, { "epoch": 1.81, "learning_rate": 1.238897731497224e-07, "loss": 0.3633, "step": 606 }, { "epoch": 1.81, "logps_train/policy_1_2": -166.6841583251953, "logps_train/policy_1_l": -171.152587890625, "logps_train/policy_1_w": -109.39875030517578, "logps_train/policy_2_2": -130.31182861328125, "logps_train/policy_2_w": -151.3473663330078, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.178459644317627, "rewards_train/1-l": -1.6586169004440308, "rewards_train/1-w": 2.8253591060638428, "rewards_train/2-2": 2.6172542572021484, "rewards_train/2-w": 1.20510733127594, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.4839760065078735, "rewards_train/margins_1": 1.6468994617462158, "rewards_train/margins_2": 1.4121469259262085, "step": 606 }, { "epoch": 1.81, "logps_train/policy_1_2": -152.95033264160156, "logps_train/policy_1_l": -166.88165283203125, "logps_train/policy_1_w": -107.27964782714844, "logps_train/policy_2_2": -113.74740600585938, "logps_train/policy_2_w": -145.90647888183594, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 2.364341974258423, "rewards_train/1-l": -1.7037907838821411, "rewards_train/1-w": 4.214223384857178, "rewards_train/2-2": 3.7502591609954834, "rewards_train/2-w": 2.4249773025512695, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.918014168739319, "rewards_train/margins_1": 1.8498814105987549, "rewards_train/margins_2": 1.3252818584442139, "step": 606 }, { "epoch": 1.81, "logps_train/policy_1_2": -178.3680419921875, "logps_train/policy_1_l": -148.04522705078125, "logps_train/policy_1_w": -110.71597290039062, "logps_train/policy_2_2": -134.0509033203125, "logps_train/policy_2_w": -147.21163940429688, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 0.8821420669555664, "rewards_train/1-l": -2.072491407394409, "rewards_train/1-w": 3.140120506286621, "rewards_train/2-2": 3.0343620777130127, "rewards_train/2-w": 1.5807883739471436, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.21261191368103, "rewards_train/margins_1": 2.2579784393310547, "rewards_train/margins_2": 1.4535737037658691, "step": 606 }, { "epoch": 1.81, "logps_train/policy_1_2": -143.5430145263672, "logps_train/policy_1_l": -141.34019470214844, "logps_train/policy_1_w": -73.34256744384766, "logps_train/policy_2_2": -97.17083740234375, "logps_train/policy_2_w": -104.27545166015625, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": 0.650385856628418, "rewards_train/1-l": -1.975034475326538, "rewards_train/1-w": 2.4571492671966553, "rewards_train/2-2": 2.2114317417144775, "rewards_train/2-w": 1.5068295001983643, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.432183742523193, "rewards_train/margins_1": 1.8067634105682373, "rewards_train/margins_2": 0.7046022415161133, "step": 606 }, { "epoch": 1.81, "logps_train/policy_1_2": -206.10585021972656, "logps_train/policy_1_l": -202.8150634765625, "logps_train/policy_1_w": -114.87355041503906, "logps_train/policy_2_2": -167.54640197753906, "logps_train/policy_2_w": -150.9908447265625, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.9144147634506226, "rewards_train/1-l": -2.628723621368408, "rewards_train/1-w": 3.456394672393799, "rewards_train/2-2": 3.571922540664673, "rewards_train/2-w": 2.1274771690368652, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.085118293762207, "rewards_train/margins_1": 1.5419799089431763, "rewards_train/margins_2": 1.4444453716278076, "step": 606 }, { "epoch": 1.81, "logps_train/policy_1_2": -116.70201110839844, "logps_train/policy_1_l": -105.19427490234375, "logps_train/policy_1_w": -49.397281646728516, "logps_train/policy_2_2": -76.53985595703125, "logps_train/policy_2_w": -73.68030548095703, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -76.5, "logps_train/ref_1_w": -67.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -83.0, "rewards_train/1-2": 1.1454243659973145, "rewards_train/1-l": -2.8502869606018066, "rewards_train/1-w": 1.7475764751434326, "rewards_train/2-2": 2.649139165878296, "rewards_train/2-w": 0.909313440322876, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.597863435745239, "rewards_train/margins_1": 0.6021521091461182, "rewards_train/margins_2": 1.73982572555542, "step": 606 }, { "epoch": 1.81, "logps_train/policy_1_2": -109.0264892578125, "logps_train/policy_1_l": -123.99668884277344, "logps_train/policy_1_w": -95.55278015136719, "logps_train/policy_2_2": -81.32655334472656, "logps_train/policy_2_w": -127.56172180175781, "logps_train/ref_1_2": -119.5, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -100.5, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.0395379066467285, "rewards_train/1-l": -1.5684187412261963, "rewards_train/1-w": 2.665816068649292, "rewards_train/2-2": 1.922813057899475, "rewards_train/2-w": 1.2532029151916504, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.234234809875488, "rewards_train/margins_1": 1.6262781620025635, "rewards_train/margins_2": 0.6696101427078247, "step": 606 }, { "epoch": 1.81, "logps_train/policy_1_2": -169.15333557128906, "logps_train/policy_1_l": -169.3594970703125, "logps_train/policy_1_w": -88.37515258789062, "logps_train/policy_2_2": -127.68569946289062, "logps_train/policy_2_w": -129.29502868652344, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 1.2252916097640991, "rewards_train/1-l": -3.07266902923584, "rewards_train/1-w": 3.1046721935272217, "rewards_train/2-2": 3.0587735176086426, "rewards_train/2-w": 1.5829968452453613, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.1773412227630615, "rewards_train/margins_1": 1.8793805837631226, "rewards_train/margins_2": 1.4757766723632812, "step": 606 }, { "epoch": 1.82, "logps_train/policy_1_2": -73.04039764404297, "logps_train/policy_1_l": -75.8508529663086, "logps_train/policy_1_w": -64.10932922363281, "logps_train/policy_2_2": -59.27092361450195, "logps_train/policy_2_w": -92.33123779296875, "logps_train/ref_1_2": -82.0, "logps_train/ref_1_l": -65.5, "logps_train/ref_1_w": -92.5, "logps_train/ref_2_2": -77.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 0.9104136824607849, "rewards_train/1-l": -1.0255153179168701, "rewards_train/1-w": 2.826176643371582, "rewards_train/2-2": 1.765290379524231, "rewards_train/2-w": 1.6594548225402832, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.851691961288452, "rewards_train/margins_1": 1.9157629609107971, "rewards_train/margins_2": 0.10583555698394775, "step": 607 }, { "epoch": 1.82, "logps_train/policy_1_2": -54.00349426269531, "logps_train/policy_1_l": -151.55569458007812, "logps_train/policy_1_w": -98.60038757324219, "logps_train/policy_2_2": -44.97551727294922, "logps_train/policy_2_w": -129.10791015625, "logps_train/ref_1_2": -67.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -59.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.2706959247589111, "rewards_train/1-l": -2.4818761348724365, "rewards_train/1-w": 2.763629913330078, "rewards_train/2-2": 1.4246159791946411, "rewards_train/2-w": 1.1540533304214478, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.245506048202515, "rewards_train/margins_1": 1.492933988571167, "rewards_train/margins_2": 0.27056264877319336, "step": 607 }, { "epoch": 1.82, "logps_train/policy_1_2": -110.21224975585938, "logps_train/policy_1_l": -57.83583068847656, "logps_train/policy_1_w": -48.05359649658203, "logps_train/policy_2_2": -64.3382568359375, "logps_train/policy_2_w": -84.13806915283203, "logps_train/ref_1_2": -122.5, "logps_train/ref_1_l": -45.0, "logps_train/ref_1_w": -65.5, "logps_train/ref_2_2": -94.5, "logps_train/ref_2_w": -88.5, "rewards_train/1-2": 1.21158766746521, "rewards_train/1-l": -1.283583164215088, "rewards_train/1-w": 1.7395622730255127, "rewards_train/2-2": 3.0114872455596924, "rewards_train/2-w": 0.4432240426540375, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.0231454372406006, "rewards_train/margins_1": 0.5279746055603027, "rewards_train/margins_2": 2.568263202905655, "step": 607 }, { "epoch": 1.82, "logps_train/policy_1_2": -149.4683837890625, "logps_train/policy_1_l": -224.4359130859375, "logps_train/policy_1_w": -145.44265747070312, "logps_train/policy_2_2": -109.94673156738281, "logps_train/policy_2_w": -199.6541748046875, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.8375369310379028, "rewards_train/1-l": -3.28109073638916, "rewards_train/1-w": 3.583859443664551, "rewards_train/2-2": 3.123685836791992, "rewards_train/2-w": 1.215832233428955, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.864950180053711, "rewards_train/margins_1": 1.746322512626648, "rewards_train/margins_2": 1.907853603363037, "step": 607 }, { "epoch": 1.82, "logps_train/policy_1_2": -116.7461166381836, "logps_train/policy_1_l": -109.98076629638672, "logps_train/policy_1_w": -79.14043426513672, "logps_train/policy_2_2": -99.24222564697266, "logps_train/policy_2_w": -95.80372619628906, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -95.5, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 1.7660131454467773, "rewards_train/1-l": -1.4219045639038086, "rewards_train/1-w": 2.3711133003234863, "rewards_train/2-2": 2.474215030670166, "rewards_train/2-w": 1.7930643558502197, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.793017864227295, "rewards_train/margins_1": 0.605100154876709, "rewards_train/margins_2": 0.6811506748199463, "step": 607 }, { "epoch": 1.82, "logps_train/policy_1_2": -192.91586303710938, "logps_train/policy_1_l": -150.74681091308594, "logps_train/policy_1_w": -116.32125854492188, "logps_train/policy_2_2": -163.69650268554688, "logps_train/policy_2_w": -152.89498901367188, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.6881020069122314, "rewards_train/1-l": -2.4735095500946045, "rewards_train/1-w": 2.940530776977539, "rewards_train/2-2": 3.02097487449646, "rewards_train/2-w": 1.4620634317398071, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.4140403270721436, "rewards_train/margins_1": 1.2524287700653076, "rewards_train/margins_2": 1.5589114427566528, "step": 607 }, { "epoch": 1.82, "logps_train/policy_1_2": -218.7932891845703, "logps_train/policy_1_l": -185.6151123046875, "logps_train/policy_1_w": -140.68963623046875, "logps_train/policy_2_2": -186.21963500976562, "logps_train/policy_2_w": -199.38714599609375, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 1.698211431503296, "rewards_train/1-l": -2.7124876976013184, "rewards_train/1-w": 4.246662139892578, "rewards_train/2-2": 3.156161308288574, "rewards_train/2-w": 1.7831611633300781, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.9591498374938965, "rewards_train/margins_1": 2.5484507083892822, "rewards_train/margins_2": 1.373000144958496, "step": 607 }, { "epoch": 1.82, "logps_train/policy_1_2": -149.2696990966797, "logps_train/policy_1_l": -189.65380859375, "logps_train/policy_1_w": -103.6530990600586, "logps_train/policy_2_2": -124.33255767822266, "logps_train/policy_2_w": -127.89742279052734, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.8433421850204468, "rewards_train/1-l": -1.671631097793579, "rewards_train/1-w": 3.3026585578918457, "rewards_train/2-2": 2.6151812076568604, "rewards_train/2-w": 2.583695888519287, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.974289655685425, "rewards_train/margins_1": 1.459316372871399, "rewards_train/margins_2": 0.03148531913757324, "step": 607 }, { "epoch": 1.82, "learning_rate": 1.1632733510423932e-07, "loss": 0.3962, "step": 608 }, { "epoch": 1.82, "logps_train/policy_1_2": -171.65939331054688, "logps_train/policy_1_l": -142.01419067382812, "logps_train/policy_1_w": -138.39755249023438, "logps_train/policy_2_2": -131.4810791015625, "logps_train/policy_2_w": -167.99310302734375, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.0778114795684814, "rewards_train/1-l": -1.9773950576782227, "rewards_train/1-w": 3.1621978282928467, "rewards_train/2-2": 2.7456421852111816, "rewards_train/2-w": 2.0422916412353516, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.139592885971069, "rewards_train/margins_1": 2.0843863487243652, "rewards_train/margins_2": 0.7033505439758301, "step": 608 }, { "epoch": 1.82, "logps_train/policy_1_2": -166.4536895751953, "logps_train/policy_1_l": -221.8505096435547, "logps_train/policy_1_w": -108.10914611816406, "logps_train/policy_2_2": -115.38735961914062, "logps_train/policy_2_w": -157.36941528320312, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.7702556848526, "rewards_train/1-l": -2.9885425567626953, "rewards_train/1-w": 3.2904772758483887, "rewards_train/2-2": 3.5800139904022217, "rewards_train/2-w": 1.8407435417175293, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.279019832611084, "rewards_train/margins_1": 1.5202215909957886, "rewards_train/margins_2": 1.7392704486846924, "step": 608 }, { "epoch": 1.82, "logps_train/policy_1_2": -45.94952392578125, "logps_train/policy_1_l": -16.30049705505371, "logps_train/policy_1_w": -46.24745559692383, "logps_train/policy_2_2": -30.085857391357422, "logps_train/policy_2_w": -67.66978454589844, "logps_train/ref_1_2": -50.0, "logps_train/ref_1_l": -9.25, "logps_train/ref_1_w": -59.0, "logps_train/ref_2_2": -41.5, "logps_train/ref_2_w": -71.0, "rewards_train/1-2": 0.3866885304450989, "rewards_train/1-l": -0.7067586183547974, "rewards_train/1-w": 1.2623640298843384, "rewards_train/2-2": 1.1625081300735474, "rewards_train/2-w": 0.3158341646194458, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.9691226482391357, "rewards_train/margins_1": 0.8756754994392395, "rewards_train/margins_2": 0.8466739654541016, "step": 608 }, { "epoch": 1.82, "logps_train/policy_1_2": -146.83529663085938, "logps_train/policy_1_l": -133.23800659179688, "logps_train/policy_1_w": -155.26959228515625, "logps_train/policy_2_2": -113.24825286865234, "logps_train/policy_2_w": -199.87188720703125, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.607290506362915, "rewards_train/1-l": -1.634934425354004, "rewards_train/1-w": 2.393744468688965, "rewards_train/2-2": 3.2445106506347656, "rewards_train/2-w": 0.6331231594085693, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.028678894042969, "rewards_train/margins_1": 0.7864539623260498, "rewards_train/margins_2": 2.6113874912261963, "step": 608 }, { "epoch": 1.82, "logps_train/policy_1_2": -159.18157958984375, "logps_train/policy_1_l": -143.51797485351562, "logps_train/policy_1_w": -119.69573974609375, "logps_train/policy_2_2": -119.77072143554688, "logps_train/policy_2_w": -168.80592346191406, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.9187568426132202, "rewards_train/1-l": -1.5060935020446777, "rewards_train/1-w": 3.6460506916046143, "rewards_train/2-2": 2.895975112915039, "rewards_train/2-w": 1.8026111125946045, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.152144193649292, "rewards_train/margins_1": 1.727293848991394, "rewards_train/margins_2": 1.0933640003204346, "step": 608 }, { "epoch": 1.82, "logps_train/policy_1_2": -180.71261596679688, "logps_train/policy_1_l": -166.12881469726562, "logps_train/policy_1_w": -121.3023681640625, "logps_train/policy_2_2": -138.3295135498047, "logps_train/policy_2_w": -166.35662841796875, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.992314338684082, "rewards_train/1-l": -2.2312393188476562, "rewards_train/1-w": 3.3504273891448975, "rewards_train/2-2": 3.4696855545043945, "rewards_train/2-w": 1.4908990859985352, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.581666707992554, "rewards_train/margins_1": 1.3581130504608154, "rewards_train/margins_2": 1.9787864685058594, "step": 608 }, { "epoch": 1.82, "logps_train/policy_1_2": -125.86758422851562, "logps_train/policy_1_l": -120.78811645507812, "logps_train/policy_1_w": -106.31855773925781, "logps_train/policy_2_2": -101.63214111328125, "logps_train/policy_2_w": -143.22451782226562, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.8952739238739014, "rewards_train/1-l": -1.6577174663543701, "rewards_train/1-w": 3.123612880706787, "rewards_train/2-2": 2.8867859840393066, "rewards_train/2-w": 1.614266037940979, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.781330347061157, "rewards_train/margins_1": 1.2283389568328857, "rewards_train/margins_2": 1.2725199460983276, "step": 608 }, { "epoch": 1.82, "logps_train/policy_1_2": -190.26498413085938, "logps_train/policy_1_l": -207.15928649902344, "logps_train/policy_1_w": -155.95632934570312, "logps_train/policy_2_2": -148.86090087890625, "logps_train/policy_2_w": -195.89697265625, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.796938180923462, "rewards_train/1-l": -2.060460090637207, "rewards_train/1-w": 3.079367160797119, "rewards_train/2-2": 3.580317497253418, "rewards_train/2-w": 1.5524890422821045, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.139827251434326, "rewards_train/margins_1": 1.2824289798736572, "rewards_train/margins_2": 2.0278284549713135, "step": 608 }, { "epoch": 1.82, "logps_train/policy_1_2": -230.3341064453125, "logps_train/policy_1_l": -169.36529541015625, "logps_train/policy_1_w": -130.3850555419922, "logps_train/policy_2_2": -178.90245056152344, "logps_train/policy_2_w": -178.8476104736328, "logps_train/ref_1_2": -249.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.8290892839431763, "rewards_train/1-l": -1.8035212755203247, "rewards_train/1-w": 3.5818071365356445, "rewards_train/2-2": 3.676161289215088, "rewards_train/2-w": 1.8722703456878662, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.385328412055969, "rewards_train/margins_1": 1.7527178525924683, "rewards_train/margins_2": 1.8038909435272217, "step": 609 }, { "epoch": 1.82, "logps_train/policy_1_2": -204.91592407226562, "logps_train/policy_1_l": -107.74564361572266, "logps_train/policy_1_w": -67.71237182617188, "logps_train/policy_2_2": -159.856201171875, "logps_train/policy_2_w": -86.2667465209961, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -95.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -107.0, "rewards_train/1-2": 1.947470784187317, "rewards_train/1-l": -1.6667520999908447, "rewards_train/1-w": 2.777200698852539, "rewards_train/2-2": 4.063598155975342, "rewards_train/2-w": 2.0889503955841064, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.443952798843384, "rewards_train/margins_1": 0.8297299146652222, "rewards_train/margins_2": 1.9746477603912354, "step": 609 }, { "epoch": 1.82, "logps_train/policy_1_2": -142.46621704101562, "logps_train/policy_1_l": -164.0753173828125, "logps_train/policy_1_w": -116.18981170654297, "logps_train/policy_2_2": -112.59072875976562, "logps_train/policy_2_w": -156.08578491210938, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.6627521514892578, "rewards_train/1-l": -1.3653444051742554, "rewards_train/1-w": 3.098987340927124, "rewards_train/2-2": 3.036240577697754, "rewards_train/2-w": 1.588295578956604, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.464331746101379, "rewards_train/margins_1": 1.4362351894378662, "rewards_train/margins_2": 1.44794499874115, "step": 609 }, { "epoch": 1.82, "logps_train/policy_1_2": -130.2248992919922, "logps_train/policy_1_l": -209.30267333984375, "logps_train/policy_1_w": -129.0262451171875, "logps_train/policy_2_2": -98.23139953613281, "logps_train/policy_2_w": -171.85983276367188, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.267352819442749, "rewards_train/1-l": -3.035736083984375, "rewards_train/1-w": 3.0661263465881348, "rewards_train/2-2": 2.8065474033355713, "rewards_train/2-w": 1.177298665046692, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.10186243057251, "rewards_train/margins_1": 1.7987735271453857, "rewards_train/margins_2": 1.6292487382888794, "step": 609 }, { "epoch": 1.82, "logps_train/policy_1_2": -186.21453857421875, "logps_train/policy_1_l": -97.50366973876953, "logps_train/policy_1_w": -101.37260437011719, "logps_train/policy_2_2": -153.43545532226562, "logps_train/policy_2_w": -128.6873016357422, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.5293270349502563, "rewards_train/1-l": -1.6722416877746582, "rewards_train/1-w": 3.4580516815185547, "rewards_train/2-2": 2.8923912048339844, "rewards_train/2-w": 2.200019359588623, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.130293369293213, "rewards_train/margins_1": 1.9287246465682983, "rewards_train/margins_2": 0.6923718452453613, "step": 609 }, { "epoch": 1.82, "logps_train/policy_1_2": -203.396728515625, "logps_train/policy_1_l": -244.4688720703125, "logps_train/policy_1_w": -87.4178695678711, "logps_train/policy_2_2": -157.37110900878906, "logps_train/policy_2_w": -120.4560317993164, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.2431386709213257, "rewards_train/1-l": -3.4194459915161133, "rewards_train/1-w": 2.5550880432128906, "rewards_train/2-2": 3.151952028274536, "rewards_train/2-w": 1.5387721061706543, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.974534034729004, "rewards_train/margins_1": 1.311949372291565, "rewards_train/margins_2": 1.6131799221038818, "step": 609 }, { "epoch": 1.82, "logps_train/policy_1_2": -85.70076751708984, "logps_train/policy_1_l": -124.71582794189453, "logps_train/policy_1_w": -33.63444900512695, "logps_train/policy_2_2": -66.96038055419922, "logps_train/policy_2_w": -43.056312561035156, "logps_train/ref_1_2": -99.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -51.5, "logps_train/ref_2_2": -90.0, "logps_train/ref_2_w": -56.5, "rewards_train/1-2": 1.295548439025879, "rewards_train/1-l": -2.650099039077759, "rewards_train/1-w": 1.7853829860687256, "rewards_train/2-2": 2.2898991107940674, "rewards_train/2-w": 1.3467128276824951, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.435482025146484, "rewards_train/margins_1": 0.4898345470428467, "rewards_train/margins_2": 0.9431862831115723, "step": 609 }, { "epoch": 1.82, "logps_train/policy_1_2": -163.1893310546875, "logps_train/policy_1_l": -178.57098388671875, "logps_train/policy_1_w": -150.4400634765625, "logps_train/policy_2_2": -130.70458984375, "logps_train/policy_2_w": -194.73358154296875, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.5013798475265503, "rewards_train/1-l": -2.146160364151001, "rewards_train/1-w": 3.708336591720581, "rewards_train/2-2": 2.6506338119506836, "rewards_train/2-w": 1.4926587343215942, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.854496955871582, "rewards_train/margins_1": 2.2069567441940308, "rewards_train/margins_2": 1.1579750776290894, "step": 609 }, { "epoch": 1.83, "learning_rate": 1.0899753930869395e-07, "loss": 0.4203, "step": 610 }, { "epoch": 1.83, "logps_train/policy_1_2": -208.16558837890625, "logps_train/policy_1_l": -257.1080322265625, "logps_train/policy_1_w": -157.6050567626953, "logps_train/policy_2_2": -158.25482177734375, "logps_train/policy_2_w": -194.8396453857422, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -236.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.508441686630249, "rewards_train/1-l": -2.092052936553955, "rewards_train/1-w": 3.331681251525879, "rewards_train/2-2": 3.3870186805725098, "rewards_train/2-w": 1.975409984588623, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.423734188079834, "rewards_train/margins_1": 1.8232395648956299, "rewards_train/margins_2": 1.4116086959838867, "step": 610 }, { "epoch": 1.83, "logps_train/policy_1_2": -116.27699279785156, "logps_train/policy_1_l": -133.5373992919922, "logps_train/policy_1_w": -91.05596923828125, "logps_train/policy_2_2": -78.12318420410156, "logps_train/policy_2_w": -132.94085693359375, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 0.661557674407959, "rewards_train/1-l": -0.10825619101524353, "rewards_train/1-w": 3.2215511798858643, "rewards_train/2-2": 2.634361743927002, "rewards_train/2-w": 1.2025933265686035, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.329807370901108, "rewards_train/margins_1": 2.5599935054779053, "rewards_train/margins_2": 1.4317684173583984, "step": 610 }, { "epoch": 1.83, "logps_train/policy_1_2": -100.71247863769531, "logps_train/policy_1_l": -91.07010650634766, "logps_train/policy_1_w": -76.05908966064453, "logps_train/policy_2_2": -78.83097076416016, "logps_train/policy_2_w": -113.17119598388672, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -74.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -99.5, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 1.2795329093933105, "rewards_train/1-l": -1.6640416383743286, "rewards_train/1-w": 2.4558095932006836, "rewards_train/2-2": 2.037996530532837, "rewards_train/2-w": 1.2158881425857544, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.119851231575012, "rewards_train/margins_1": 1.176276683807373, "rewards_train/margins_2": 0.8221083879470825, "step": 610 }, { "epoch": 1.83, "logps_train/policy_1_2": -126.06893920898438, "logps_train/policy_1_l": -169.82664489746094, "logps_train/policy_1_w": -135.6625518798828, "logps_train/policy_2_2": -95.30058288574219, "logps_train/policy_2_w": -163.43511962890625, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.5821683406829834, "rewards_train/1-l": -2.0982890129089355, "rewards_train/1-w": 3.0970263481140137, "rewards_train/2-2": 2.8090038299560547, "rewards_train/2-w": 1.8541440963745117, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.195315361022949, "rewards_train/margins_1": 1.5148580074310303, "rewards_train/margins_2": 0.954859733581543, "step": 610 }, { "epoch": 1.83, "logps_train/policy_1_2": -120.9853515625, "logps_train/policy_1_l": -124.94024658203125, "logps_train/policy_1_w": -70.4063949584961, "logps_train/policy_2_2": -84.36719512939453, "logps_train/policy_2_w": -108.03927612304688, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -99.5, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -118.5, "rewards_train/1-2": 1.1745126247406006, "rewards_train/1-l": -1.5686333179473877, "rewards_train/1-w": 2.9156103134155273, "rewards_train/2-2": 2.696483850479126, "rewards_train/2-w": 1.0741969347000122, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.484243631362915, "rewards_train/margins_1": 1.7410976886749268, "rewards_train/margins_2": 1.6222869157791138, "step": 610 }, { "epoch": 1.83, "logps_train/policy_1_2": -151.2220916748047, "logps_train/policy_1_l": -230.4906005859375, "logps_train/policy_1_w": -183.18978881835938, "logps_train/policy_2_2": -114.61241912841797, "logps_train/policy_2_w": -232.84495544433594, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 1.3656818866729736, "rewards_train/1-l": -2.8418328762054443, "rewards_train/1-w": 3.4677388668060303, "rewards_train/2-2": 2.872742176055908, "rewards_train/2-w": 1.5014426708221436, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.309571743011475, "rewards_train/margins_1": 2.1020569801330566, "rewards_train/margins_2": 1.3712995052337646, "step": 610 }, { "epoch": 1.83, "logps_train/policy_1_2": -233.05853271484375, "logps_train/policy_1_l": -187.89364624023438, "logps_train/policy_1_w": -105.11729431152344, "logps_train/policy_2_2": -192.83700561523438, "logps_train/policy_2_w": -138.248291015625, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -226.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.6269599199295044, "rewards_train/1-l": -2.397177219390869, "rewards_train/1-w": 2.9191298484802246, "rewards_train/2-2": 3.2928621768951416, "rewards_train/2-w": 1.7298593521118164, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.316307067871094, "rewards_train/margins_1": 1.2921699285507202, "rewards_train/margins_2": 1.5630028247833252, "step": 610 }, { "epoch": 1.83, "logps_train/policy_1_2": -61.318450927734375, "logps_train/policy_1_l": -108.96199798583984, "logps_train/policy_1_w": -114.96113586425781, "logps_train/policy_2_2": -44.84278106689453, "logps_train/policy_2_w": -152.01412963867188, "logps_train/ref_1_2": -75.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -63.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.3880764245986938, "rewards_train/1-l": -1.1794028282165527, "rewards_train/1-w": 2.779667854309082, "rewards_train/2-2": 1.840721845626831, "rewards_train/2-w": 1.3571810722351074, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.9590706825256348, "rewards_train/margins_1": 1.3915914297103882, "rewards_train/margins_2": 0.48354077339172363, "step": 610 }, { "epoch": 1.83, "logps_train/policy_1_2": -189.33705139160156, "logps_train/policy_1_l": -229.35858154296875, "logps_train/policy_1_w": -118.37060546875, "logps_train/policy_2_2": -149.5393524169922, "logps_train/policy_2_w": -151.79617309570312, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.8162940740585327, "rewards_train/1-l": -2.4124202728271484, "rewards_train/1-w": 3.009032726287842, "rewards_train/2-2": 3.364814281463623, "rewards_train/2-w": 1.9516327381134033, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.42145299911499, "rewards_train/margins_1": 1.192738652229309, "rewards_train/margins_2": 1.4131815433502197, "step": 611 }, { "epoch": 1.83, "logps_train/policy_1_2": -54.1292610168457, "logps_train/policy_1_l": -74.31437683105469, "logps_train/policy_1_w": -80.64139556884766, "logps_train/policy_2_2": -38.17665481567383, "logps_train/policy_2_w": -107.19001770019531, "logps_train/ref_1_2": -65.0, "logps_train/ref_1_l": -63.5, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -56.25, "logps_train/ref_2_w": -119.5, "rewards_train/1-2": 1.0675426721572876, "rewards_train/1-l": -1.0962811708450317, "rewards_train/1-w": 2.7026572227478027, "rewards_train/2-2": 1.801866054534912, "rewards_train/2-w": 1.2583417892456055, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.7989383935928345, "rewards_train/margins_1": 1.6351145505905151, "rewards_train/margins_2": 0.5435242652893066, "step": 611 }, { "epoch": 1.83, "logps_train/policy_1_2": -148.291015625, "logps_train/policy_1_l": -163.5496826171875, "logps_train/policy_1_w": -91.3716049194336, "logps_train/policy_2_2": -111.47789764404297, "logps_train/policy_2_w": -120.01387023925781, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 1.455272912979126, "rewards_train/1-l": -2.2018423080444336, "rewards_train/1-w": 2.636277198791504, "rewards_train/2-2": 3.005335569381714, "rewards_train/2-w": 1.956425666809082, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.8381195068359375, "rewards_train/margins_1": 1.181004285812378, "rewards_train/margins_2": 1.0489099025726318, "step": 611 }, { "epoch": 1.83, "logps_train/policy_1_2": -167.95681762695312, "logps_train/policy_1_l": -178.1655731201172, "logps_train/policy_1_w": -85.604248046875, "logps_train/policy_2_2": -127.4213638305664, "logps_train/policy_2_w": -113.54986572265625, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.314870834350586, "rewards_train/1-l": -3.314598560333252, "rewards_train/1-w": 2.5266904830932617, "rewards_train/2-2": 3.274611473083496, "rewards_train/2-w": 1.533636212348938, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.841289043426514, "rewards_train/margins_1": 1.2118196487426758, "rewards_train/margins_2": 1.740975260734558, "step": 611 }, { "epoch": 1.83, "logps_train/policy_1_2": -151.236328125, "logps_train/policy_1_l": -156.78094482421875, "logps_train/policy_1_w": -128.06417846679688, "logps_train/policy_2_2": -119.14529418945312, "logps_train/policy_2_w": -169.38967895507812, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.7294933795928955, "rewards_train/1-l": -1.8445003032684326, "rewards_train/1-w": 2.983426332473755, "rewards_train/2-2": 2.8964080810546875, "rewards_train/2-w": 1.5321260690689087, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.8279266357421875, "rewards_train/margins_1": 1.2539329528808594, "rewards_train/margins_2": 1.3642820119857788, "step": 611 }, { "epoch": 1.83, "logps_train/policy_1_2": -217.72628784179688, "logps_train/policy_1_l": -161.2869873046875, "logps_train/policy_1_w": -132.17257690429688, "logps_train/policy_2_2": -165.51998901367188, "logps_train/policy_2_w": -178.13861083984375, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": 1.9664349555969238, "rewards_train/1-l": -2.08651065826416, "rewards_train/1-w": 3.479616641998291, "rewards_train/2-2": 4.226517677307129, "rewards_train/2-w": 1.6720772981643677, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.566127300262451, "rewards_train/margins_1": 1.5131816864013672, "rewards_train/margins_2": 2.5544403791427612, "step": 611 }, { "epoch": 1.83, "logps_train/policy_1_2": -92.36335754394531, "logps_train/policy_1_l": -55.8515739440918, "logps_train/policy_1_w": -58.85411071777344, "logps_train/policy_2_2": -64.3408203125, "logps_train/policy_2_w": -82.21198272705078, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -44.75, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -89.5, "logps_train/ref_2_w": -103.0, "rewards_train/1-2": 1.6480387449264526, "rewards_train/1-l": -1.1079113483428955, "rewards_train/1-w": 2.7473039627075195, "rewards_train/2-2": 2.5096683502197266, "rewards_train/2-w": 2.1217703819274902, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.855215311050415, "rewards_train/margins_1": 1.099265217781067, "rewards_train/margins_2": 0.38789796829223633, "step": 611 }, { "epoch": 1.83, "logps_train/policy_1_2": -191.32177734375, "logps_train/policy_1_l": -90.13591003417969, "logps_train/policy_1_w": -118.97073364257812, "logps_train/policy_2_2": -143.62051391601562, "logps_train/policy_2_w": -164.52279663085938, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -77.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 1.365478277206421, "rewards_train/1-l": -1.2697432041168213, "rewards_train/1-w": 4.138864040374756, "rewards_train/2-2": 3.478769540786743, "rewards_train/2-w": 2.2242822647094727, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.408607244491577, "rewards_train/margins_1": 2.773385763168335, "rewards_train/margins_2": 1.2544872760772705, "step": 611 }, { "epoch": 1.83, "learning_rate": 1.0190110113823426e-07, "loss": 0.389, "step": 612 }, { "epoch": 1.83, "logps_train/policy_1_2": -183.45440673828125, "logps_train/policy_1_l": -159.99559020996094, "logps_train/policy_1_w": -139.22476196289062, "logps_train/policy_2_2": -142.84930419921875, "logps_train/policy_2_w": -184.92523193359375, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.2795600891113281, "rewards_train/1-l": -1.956786036491394, "rewards_train/1-w": 3.1951019763946533, "rewards_train/2-2": 2.9556941986083984, "rewards_train/2-w": 1.552788496017456, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.151888012886047, "rewards_train/margins_1": 1.9155418872833252, "rewards_train/margins_2": 1.4029057025909424, "step": 612 }, { "epoch": 1.83, "logps_train/policy_1_2": -106.510498046875, "logps_train/policy_1_l": -154.64364624023438, "logps_train/policy_1_w": -67.53092956542969, "logps_train/policy_2_2": -71.58097076416016, "logps_train/policy_2_w": -118.09150695800781, "logps_train/ref_1_2": -112.5, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -126.5, "rewards_train/1-2": 0.5809817910194397, "rewards_train/1-l": -3.295614719390869, "rewards_train/1-w": 2.514094114303589, "rewards_train/2-2": 2.139559268951416, "rewards_train/2-w": 0.8595990538597107, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.809708833694458, "rewards_train/margins_1": 1.9331123232841492, "rewards_train/margins_2": 1.2799602150917053, "step": 612 }, { "epoch": 1.83, "logps_train/policy_1_2": -159.7195281982422, "logps_train/policy_1_l": -197.51797485351562, "logps_train/policy_1_w": -159.87271118164062, "logps_train/policy_2_2": -122.56151580810547, "logps_train/policy_2_w": -202.916748046875, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.628829002380371, "rewards_train/1-l": -2.3244543075561523, "rewards_train/1-w": 3.8939781188964844, "rewards_train/2-2": 2.9738287925720215, "rewards_train/2-w": 1.7051995992660522, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.218432426452637, "rewards_train/margins_1": 2.2651491165161133, "rewards_train/margins_2": 1.2686291933059692, "step": 612 }, { "epoch": 1.83, "logps_train/policy_1_2": -134.54934692382812, "logps_train/policy_1_l": -97.73139953613281, "logps_train/policy_1_w": -105.3374252319336, "logps_train/policy_2_2": -96.64180755615234, "logps_train/policy_2_w": -151.06585693359375, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.5528775453567505, "rewards_train/1-l": -0.9275344014167786, "rewards_train/1-w": 2.9178202152252197, "rewards_train/2-2": 2.558475971221924, "rewards_train/2-w": 0.8678277134895325, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8453546166419983, "rewards_train/margins_1": 1.3649426698684692, "rewards_train/margins_2": 1.6906482577323914, "step": 612 }, { "epoch": 1.83, "logps_train/policy_1_2": -91.70731353759766, "logps_train/policy_1_l": -116.32621765136719, "logps_train/policy_1_w": -78.09187316894531, "logps_train/policy_2_2": -78.44680786132812, "logps_train/policy_2_w": -91.1092529296875, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -105.0, "rewards_train/1-2": 1.8952842950820923, "rewards_train/1-l": -2.249027729034424, "rewards_train/1-w": 2.0048751831054688, "rewards_train/2-2": 2.3518033027648926, "rewards_train/2-w": 1.38751220703125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.253902912139893, "rewards_train/margins_1": 0.10959088802337646, "rewards_train/margins_2": 0.9642910957336426, "step": 612 }, { "epoch": 1.83, "logps_train/policy_1_2": -250.44393920898438, "logps_train/policy_1_l": -175.0877685546875, "logps_train/policy_1_w": -132.53878784179688, "logps_train/policy_2_2": -189.91119384765625, "logps_train/policy_2_w": -177.3401336669922, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 1.0493556261062622, "rewards_train/1-l": -1.9033081531524658, "rewards_train/1-w": 3.0820581912994385, "rewards_train/2-2": 3.749504327774048, "rewards_train/2-w": 1.180830478668213, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.985366344451904, "rewards_train/margins_1": 2.0327025651931763, "rewards_train/margins_2": 2.568673849105835, "step": 612 }, { "epoch": 1.83, "logps_train/policy_1_2": -139.96356201171875, "logps_train/policy_1_l": -119.35320281982422, "logps_train/policy_1_w": -96.57453155517578, "logps_train/policy_2_2": -109.6097412109375, "logps_train/policy_2_w": -135.47332763671875, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 0.6509092450141907, "rewards_train/1-l": -1.5716484785079956, "rewards_train/1-w": 2.7409842014312744, "rewards_train/2-2": 1.8362915515899658, "rewards_train/2-w": 1.1698542833328247, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.31263267993927, "rewards_train/margins_1": 2.0900749564170837, "rewards_train/margins_2": 0.6664372682571411, "step": 612 }, { "epoch": 1.83, "logps_train/policy_1_2": -249.74697875976562, "logps_train/policy_1_l": -296.8978271484375, "logps_train/policy_1_w": -210.85655212402344, "logps_train/policy_2_2": -196.6485595703125, "logps_train/policy_2_w": -260.9665222167969, "logps_train/ref_1_2": -276.0, "logps_train/ref_1_l": -258.0, "logps_train/ref_1_w": -255.0, "logps_train/ref_2_2": -238.0, "logps_train/ref_2_w": -284.0, "rewards_train/1-2": 2.531550884246826, "rewards_train/1-l": -3.842127799987793, "rewards_train/1-w": 4.444032192230225, "rewards_train/2-2": 4.178894519805908, "rewards_train/2-w": 2.3111605644226074, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 8.286159992218018, "rewards_train/margins_1": 1.9124813079833984, "rewards_train/margins_2": 1.8677339553833008, "step": 612 }, { "epoch": 1.84, "logps_train/policy_1_2": -102.91862487792969, "logps_train/policy_1_l": -112.32243347167969, "logps_train/policy_1_w": -106.08525848388672, "logps_train/policy_2_2": -79.37389373779297, "logps_train/policy_2_w": -141.99363708496094, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.3831374645233154, "rewards_train/1-l": -1.960954189300537, "rewards_train/1-w": 3.1414742469787598, "rewards_train/2-2": 2.49698543548584, "rewards_train/2-w": 1.5756359100341797, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.102428436279297, "rewards_train/margins_1": 1.7583367824554443, "rewards_train/margins_2": 0.9213495254516602, "step": 613 }, { "epoch": 1.84, "logps_train/policy_1_2": -254.71685791015625, "logps_train/policy_1_l": -168.5626220703125, "logps_train/policy_1_w": -188.68124389648438, "logps_train/policy_2_2": -212.92996215820312, "logps_train/policy_2_w": -225.1801300048828, "logps_train/ref_1_2": -280.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -231.0, "logps_train/ref_2_2": -256.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 2.568937301635742, "rewards_train/1-l": -1.6548950672149658, "rewards_train/1-w": 4.213126182556152, "rewards_train/2-2": 4.4624738693237305, "rewards_train/2-w": 2.3351120948791504, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.868021249771118, "rewards_train/margins_1": 1.6441888809204102, "rewards_train/margins_2": 2.12736177444458, "step": 613 }, { "epoch": 1.84, "logps_train/policy_1_2": -191.47705078125, "logps_train/policy_1_l": -249.151611328125, "logps_train/policy_1_w": -137.29928588867188, "logps_train/policy_2_2": -146.21441650390625, "logps_train/policy_2_w": -195.29978942871094, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.4929208755493164, "rewards_train/1-l": -3.8620355129241943, "rewards_train/1-w": 3.4778826236724854, "rewards_train/2-2": 3.297309398651123, "rewards_train/2-w": 1.6418968439102173, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.33991813659668, "rewards_train/margins_1": 1.984961748123169, "rewards_train/margins_2": 1.6554125547409058, "step": 613 }, { "epoch": 1.84, "logps_train/policy_1_2": -186.32284545898438, "logps_train/policy_1_l": -180.83314514160156, "logps_train/policy_1_w": -92.32706451416016, "logps_train/policy_2_2": -140.78713989257812, "logps_train/policy_2_w": -130.70574951171875, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.5278723239898682, "rewards_train/1-l": -1.7749526500701904, "rewards_train/1-w": 2.9337000846862793, "rewards_train/2-2": 3.0302700996398926, "rewards_train/2-w": 1.5235657691955566, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.70865273475647, "rewards_train/margins_1": 1.4058277606964111, "rewards_train/margins_2": 1.506704330444336, "step": 613 }, { "epoch": 1.84, "logps_train/policy_1_2": -106.59092712402344, "logps_train/policy_1_l": -120.7940673828125, "logps_train/policy_1_w": -69.17420196533203, "logps_train/policy_2_2": -89.42787170410156, "logps_train/policy_2_w": -78.50413513183594, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -85.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -88.0, "rewards_train/1-2": 1.046864628791809, "rewards_train/1-l": -0.7188595533370972, "rewards_train/1-w": 1.5535753965377808, "rewards_train/2-2": 1.928746223449707, "rewards_train/2-w": 0.9906514883041382, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.272434949874878, "rewards_train/margins_1": 0.5067107677459717, "rewards_train/margins_2": 0.9380947351455688, "step": 613 }, { "epoch": 1.84, "logps_train/policy_1_2": -180.04502868652344, "logps_train/policy_1_l": -185.60736083984375, "logps_train/policy_1_w": -79.06273651123047, "logps_train/policy_2_2": -152.3754119873047, "logps_train/policy_2_w": -92.47782897949219, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -112.5, "rewards_train/1-2": 1.9556539058685303, "rewards_train/1-l": -2.744720458984375, "rewards_train/1-w": 2.3796634674072266, "rewards_train/2-2": 3.235506057739258, "rewards_train/2-w": 1.9738974571228027, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.124383926391602, "rewards_train/margins_1": 0.4240095615386963, "rewards_train/margins_2": 1.261608600616455, "step": 613 }, { "epoch": 1.84, "logps_train/policy_1_2": -62.93943786621094, "logps_train/policy_1_l": -130.1627960205078, "logps_train/policy_1_w": -68.11473846435547, "logps_train/policy_2_2": -44.18968963623047, "logps_train/policy_2_w": -95.64874267578125, "logps_train/ref_1_2": -68.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -90.5, "logps_train/ref_2_2": -58.5, "logps_train/ref_2_w": -105.0, "rewards_train/1-2": 0.487989604473114, "rewards_train/1-l": -2.1446003913879395, "rewards_train/1-w": 2.22836971282959, "rewards_train/2-2": 1.4076910018920898, "rewards_train/2-w": 0.8929383754730225, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.372970104217529, "rewards_train/margins_1": 1.7403801083564758, "rewards_train/margins_2": 0.5147526264190674, "step": 613 }, { "epoch": 1.84, "logps_train/policy_1_2": -91.40232849121094, "logps_train/policy_1_l": -80.54879760742188, "logps_train/policy_1_w": -62.745399475097656, "logps_train/policy_2_2": -73.91854858398438, "logps_train/policy_2_w": -88.10072326660156, "logps_train/ref_1_2": -106.0, "logps_train/ref_1_l": -67.5, "logps_train/ref_1_w": -95.5, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -110.0, "rewards_train/1-2": 1.4259777069091797, "rewards_train/1-l": -1.3091766834259033, "rewards_train/1-w": 3.2715535163879395, "rewards_train/2-2": 2.2009193897247314, "rewards_train/2-w": 2.1930530071258545, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.580730199813843, "rewards_train/margins_1": 1.8455758094787598, "rewards_train/margins_2": 0.007866382598876953, "step": 613 }, { "epoch": 1.84, "learning_rate": 9.503871319271552e-08, "loss": 0.4267, "step": 614 }, { "epoch": 1.84, "logps_train/policy_1_2": -107.17872619628906, "logps_train/policy_1_l": -99.58922576904297, "logps_train/policy_1_w": -63.76190185546875, "logps_train/policy_2_2": -89.94320678710938, "logps_train/policy_2_w": -84.63560485839844, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -85.0, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -101.0, "rewards_train/1-2": 1.1772444248199463, "rewards_train/1-l": -1.422203779220581, "rewards_train/1-w": 2.8050599098205566, "rewards_train/2-2": 1.914272665977478, "rewards_train/2-w": 1.660658359527588, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.227263689041138, "rewards_train/margins_1": 1.6278154850006104, "rewards_train/margins_2": 0.25361430644989014, "step": 614 }, { "epoch": 1.84, "logps_train/policy_1_2": -211.85870361328125, "logps_train/policy_1_l": -172.04681396484375, "logps_train/policy_1_w": -164.33291625976562, "logps_train/policy_2_2": -171.07859802246094, "logps_train/policy_2_w": -204.16981506347656, "logps_train/ref_1_2": -227.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -203.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 1.527411699295044, "rewards_train/1-l": -1.6394463777542114, "rewards_train/1-w": 3.3729586601257324, "rewards_train/2-2": 3.1425318717956543, "rewards_train/2-w": 2.2486441135406494, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.012405037879944, "rewards_train/margins_1": 1.8455469608306885, "rewards_train/margins_2": 0.8938877582550049, "step": 614 }, { "epoch": 1.84, "logps_train/policy_1_2": -117.38911437988281, "logps_train/policy_1_l": -145.38320922851562, "logps_train/policy_1_w": -89.08680725097656, "logps_train/policy_2_2": -85.54427337646484, "logps_train/policy_2_w": -120.60651397705078, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -110.5, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.4661675691604614, "rewards_train/1-l": -1.818593978881836, "rewards_train/1-w": 2.404405117034912, "rewards_train/2-2": 2.4893229007720947, "rewards_train/2-w": 1.565716028213501, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.222999095916748, "rewards_train/margins_1": 0.9382375478744507, "rewards_train/margins_2": 0.9236068725585938, "step": 614 }, { "epoch": 1.84, "logps_train/policy_1_2": -78.56314849853516, "logps_train/policy_1_l": -67.73020935058594, "logps_train/policy_1_w": -45.507110595703125, "logps_train/policy_2_2": -58.59191131591797, "logps_train/policy_2_w": -61.39374923706055, "logps_train/ref_1_2": -91.0, "logps_train/ref_1_l": -42.5, "logps_train/ref_1_w": -67.0, "logps_train/ref_2_2": -80.0, "logps_train/ref_2_w": -74.5, "rewards_train/1-2": 1.2437829971313477, "rewards_train/1-l": -2.510129928588867, "rewards_train/1-w": 2.137179374694824, "rewards_train/2-2": 2.145007848739624, "rewards_train/2-w": 1.3102344274520874, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.647309303283691, "rewards_train/margins_1": 0.8933963775634766, "rewards_train/margins_2": 0.8347734212875366, "step": 614 }, { "epoch": 1.84, "logps_train/policy_1_2": -126.59026336669922, "logps_train/policy_1_l": -137.67979431152344, "logps_train/policy_1_w": -65.13240051269531, "logps_train/policy_2_2": -104.92488098144531, "logps_train/policy_2_w": -96.99911499023438, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -110.0, "rewards_train/1-2": 0.8827706575393677, "rewards_train/1-l": -2.301720142364502, "rewards_train/1-w": 2.2019944190979004, "rewards_train/2-2": 1.8961836099624634, "rewards_train/2-w": 1.273721694946289, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.503714561462402, "rewards_train/margins_1": 1.3192237615585327, "rewards_train/margins_2": 0.6224619150161743, "step": 614 }, { "epoch": 1.84, "logps_train/policy_1_2": -194.3168182373047, "logps_train/policy_1_l": -191.21282958984375, "logps_train/policy_1_w": -101.5919189453125, "logps_train/policy_2_2": -146.12942504882812, "logps_train/policy_2_w": -140.42538452148438, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 0.6886307597160339, "rewards_train/1-l": -2.4955012798309326, "rewards_train/1-w": 2.2001829147338867, "rewards_train/2-2": 2.9847147464752197, "rewards_train/2-w": 0.9371493458747864, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.695684194564819, "rewards_train/margins_1": 1.5115521550178528, "rewards_train/margins_2": 2.0475654006004333, "step": 614 }, { "epoch": 1.84, "logps_train/policy_1_2": -90.38272094726562, "logps_train/policy_1_l": -70.72612762451172, "logps_train/policy_1_w": -58.441490173339844, "logps_train/policy_2_2": -61.46571350097656, "logps_train/policy_2_w": -99.5064926147461, "logps_train/ref_1_2": -97.0, "logps_train/ref_1_l": -56.25, "logps_train/ref_1_w": -81.0, "logps_train/ref_2_2": -78.5, "logps_train/ref_2_w": -106.0, "rewards_train/1-2": 0.6640709638595581, "rewards_train/1-l": -1.456011414527893, "rewards_train/1-w": 2.234757661819458, "rewards_train/2-2": 1.678819179534912, "rewards_train/2-w": 0.6040375232696533, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.690769076347351, "rewards_train/margins_1": 1.5706866979599, "rewards_train/margins_2": 1.0747816562652588, "step": 614 }, { "epoch": 1.84, "logps_train/policy_1_2": -162.03903198242188, "logps_train/policy_1_l": -83.78150177001953, "logps_train/policy_1_w": -97.64490509033203, "logps_train/policy_2_2": -125.6309814453125, "logps_train/policy_2_w": -126.55630493164062, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -72.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 0.9925820827484131, "rewards_train/1-l": -1.171069860458374, "rewards_train/1-w": 2.5882439613342285, "rewards_train/2-2": 2.948230504989624, "rewards_train/2-w": 1.2006193399429321, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7593138217926025, "rewards_train/margins_1": 1.5956618785858154, "rewards_train/margins_2": 1.747611165046692, "step": 614 }, { "epoch": 1.84, "logps_train/policy_1_2": -133.63638305664062, "logps_train/policy_1_l": -193.2489013671875, "logps_train/policy_1_w": -102.04914855957031, "logps_train/policy_2_2": -96.22899627685547, "logps_train/policy_2_w": -147.20729064941406, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.684800148010254, "rewards_train/1-l": -2.6983273029327393, "rewards_train/1-w": 3.1382486820220947, "rewards_train/2-2": 3.070850372314453, "rewards_train/2-w": 1.5698964595794678, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.836575984954834, "rewards_train/margins_1": 1.4534485340118408, "rewards_train/margins_2": 1.5009539127349854, "step": 615 }, { "epoch": 1.84, "logps_train/policy_1_2": -183.34310913085938, "logps_train/policy_1_l": -221.76577758789062, "logps_train/policy_1_w": -183.05935668945312, "logps_train/policy_2_2": -149.80319213867188, "logps_train/policy_2_w": -231.26290893554688, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -229.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 2.6414716243743896, "rewards_train/1-l": -2.404116153717041, "rewards_train/1-w": 4.586251258850098, "rewards_train/2-2": 3.7192914485931396, "rewards_train/2-w": 2.3018345832824707, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.990367412567139, "rewards_train/margins_1": 1.944779634475708, "rewards_train/margins_2": 1.417456865310669, "step": 615 }, { "epoch": 1.84, "logps_train/policy_1_2": -97.73808288574219, "logps_train/policy_1_l": -80.28848266601562, "logps_train/policy_1_w": -60.58659744262695, "logps_train/policy_2_2": -84.4295654296875, "logps_train/policy_2_w": -91.49050903320312, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -61.0, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -107.0, "rewards_train/1-2": 1.5191603899002075, "rewards_train/1-l": -1.918691635131836, "rewards_train/1-w": 2.6262526512145996, "rewards_train/2-2": 2.0894651412963867, "rewards_train/2-w": 1.5376677513122559, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.5449442863464355, "rewards_train/margins_1": 1.107092261314392, "rewards_train/margins_2": 0.5517973899841309, "step": 615 }, { "epoch": 1.84, "logps_train/policy_1_2": -86.59759521484375, "logps_train/policy_1_l": -116.88096618652344, "logps_train/policy_1_w": -108.61565399169922, "logps_train/policy_2_2": -61.17793655395508, "logps_train/policy_2_w": -145.98422241210938, "logps_train/ref_1_2": -96.0, "logps_train/ref_1_l": -96.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -82.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.897271990776062, "rewards_train/1-l": -2.0216898918151855, "rewards_train/1-w": 2.454841136932373, "rewards_train/2-2": 2.1271283626556396, "rewards_train/2-w": 0.5672023892402649, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.476531028747559, "rewards_train/margins_1": 1.557569146156311, "rewards_train/margins_2": 1.5599259734153748, "step": 615 }, { "epoch": 1.84, "logps_train/policy_1_2": -259.2226257324219, "logps_train/policy_1_l": -225.8004150390625, "logps_train/policy_1_w": -154.67820739746094, "logps_train/policy_2_2": -196.52835083007812, "logps_train/policy_2_w": -209.9683074951172, "logps_train/ref_1_2": -272.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -240.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": 1.3105510473251343, "rewards_train/1-l": -2.3003530502319336, "rewards_train/1-w": 4.0415544509887695, "rewards_train/2-2": 4.367476940155029, "rewards_train/2-w": 1.828169345855713, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.341907501220703, "rewards_train/margins_1": 2.7310034036636353, "rewards_train/margins_2": 2.5393075942993164, "step": 615 }, { "epoch": 1.84, "logps_train/policy_1_2": -85.66032409667969, "logps_train/policy_1_l": -91.50209045410156, "logps_train/policy_1_w": -68.42741394042969, "logps_train/policy_2_2": -62.723236083984375, "logps_train/policy_2_w": -90.86671447753906, "logps_train/ref_1_2": -101.5, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -104.5, "rewards_train/1-2": 1.5620927810668945, "rewards_train/1-l": -1.3543105125427246, "rewards_train/1-w": 2.339289903640747, "rewards_train/2-2": 2.233145236968994, "rewards_train/2-w": 1.3637198209762573, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6936004161834717, "rewards_train/margins_1": 0.7771971225738525, "rewards_train/margins_2": 0.8694254159927368, "step": 615 }, { "epoch": 1.84, "logps_train/policy_1_2": -125.42733764648438, "logps_train/policy_1_l": -108.00725555419922, "logps_train/policy_1_w": -118.72999572753906, "logps_train/policy_2_2": -87.0142822265625, "logps_train/policy_2_w": -163.18557739257812, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": 1.2357819080352783, "rewards_train/1-l": -1.6799250841140747, "rewards_train/1-w": 3.2836408615112305, "rewards_train/2-2": 3.0841188430786133, "rewards_train/2-w": 1.3462858200073242, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.963565945625305, "rewards_train/margins_1": 2.047858953475952, "rewards_train/margins_2": 1.737833023071289, "step": 615 }, { "epoch": 1.84, "logps_train/policy_1_2": -150.7056884765625, "logps_train/policy_1_l": -193.78916931152344, "logps_train/policy_1_w": -94.84760284423828, "logps_train/policy_2_2": -113.21509552001953, "logps_train/policy_2_w": -137.4915771484375, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.2294304370880127, "rewards_train/1-l": -3.975792169570923, "rewards_train/1-w": 2.751567840576172, "rewards_train/2-2": 2.644115447998047, "rewards_train/2-w": 1.55865478515625, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.727360010147095, "rewards_train/margins_1": 1.5221374034881592, "rewards_train/margins_2": 1.0854606628417969, "step": 615 }, { "epoch": 1.84, "learning_rate": 8.841104522910343e-08, "loss": 0.4511, "step": 616 }, { "epoch": 1.84, "logps_train/policy_1_2": -175.556640625, "logps_train/policy_1_l": -202.1513214111328, "logps_train/policy_1_w": -160.4834747314453, "logps_train/policy_2_2": -138.76934814453125, "logps_train/policy_2_w": -206.40367126464844, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 1.716212272644043, "rewards_train/1-l": -2.137007236480713, "rewards_train/1-w": 3.786027193069458, "rewards_train/2-2": 2.807440757751465, "rewards_train/2-w": 1.7330708503723145, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.923034429550171, "rewards_train/margins_1": 2.069814920425415, "rewards_train/margins_2": 1.0743699073791504, "step": 616 }, { "epoch": 1.84, "logps_train/policy_1_2": -129.62841796875, "logps_train/policy_1_l": -111.04317474365234, "logps_train/policy_1_w": -50.51906204223633, "logps_train/policy_2_2": -90.03466796875, "logps_train/policy_2_w": -84.48188781738281, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -70.5, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -93.5, "rewards_train/1-2": 1.105907678604126, "rewards_train/1-l": -1.7690633535385132, "rewards_train/1-w": 2.0035624504089355, "rewards_train/2-2": 2.8902835845947266, "rewards_train/2-w": 0.8674367070198059, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.7726258039474487, "rewards_train/margins_1": 0.8976547718048096, "rewards_train/margins_2": 2.0228468775749207, "step": 616 }, { "epoch": 1.84, "logps_train/policy_1_2": -184.88662719726562, "logps_train/policy_1_l": -161.28363037109375, "logps_train/policy_1_w": -169.05760192871094, "logps_train/policy_2_2": -136.72117614746094, "logps_train/policy_2_w": -222.15432739257812, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.0347745418548584, "rewards_train/1-l": -1.4660592079162598, "rewards_train/1-w": 3.4012718200683594, "rewards_train/2-2": 2.9524922370910645, "rewards_train/2-w": 1.5814416408538818, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.867331027984619, "rewards_train/margins_1": 2.366497278213501, "rewards_train/margins_2": 1.3710505962371826, "step": 616 }, { "epoch": 1.84, "logps_train/policy_1_2": -137.41734313964844, "logps_train/policy_1_l": -114.36042785644531, "logps_train/policy_1_w": -74.04598236083984, "logps_train/policy_2_2": -109.42739868164062, "logps_train/policy_2_w": -89.44884490966797, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 1.4684221744537354, "rewards_train/1-l": -1.7692458629608154, "rewards_train/1-w": 2.671964406967163, "rewards_train/2-2": 2.8080408573150635, "rewards_train/2-w": 1.9898808002471924, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.4412102699279785, "rewards_train/margins_1": 1.2035422325134277, "rewards_train/margins_2": 0.8181600570678711, "step": 616 }, { "epoch": 1.84, "logps_train/policy_1_2": -189.44488525390625, "logps_train/policy_1_l": -140.33499145507812, "logps_train/policy_1_w": -128.59054565429688, "logps_train/policy_2_2": -132.73548889160156, "logps_train/policy_2_w": -168.16983032226562, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.440669059753418, "rewards_train/1-l": -1.9182649850845337, "rewards_train/1-w": 3.388307571411133, "rewards_train/2-2": 3.6428565979003906, "rewards_train/2-w": 1.3533297777175903, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.3065725564956665, "rewards_train/margins_1": 1.9476385116577148, "rewards_train/margins_2": 2.2895268201828003, "step": 616 }, { "epoch": 1.84, "logps_train/policy_1_2": -117.23027801513672, "logps_train/policy_1_l": -145.53636169433594, "logps_train/policy_1_w": -98.391357421875, "logps_train/policy_2_2": -93.70503234863281, "logps_train/policy_2_w": -122.8812255859375, "logps_train/ref_1_2": -125.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 0.7773630619049072, "rewards_train/1-l": -2.5618393421173096, "rewards_train/1-w": 3.3626222610473633, "rewards_train/2-2": 1.6283248662948608, "rewards_train/2-w": 2.22476863861084, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.924461603164673, "rewards_train/margins_1": 2.585259199142456, "rewards_train/margins_2": -0.596443772315979, "step": 616 }, { "epoch": 1.84, "logps_train/policy_1_2": -139.77374267578125, "logps_train/policy_1_l": -132.34922790527344, "logps_train/policy_1_w": -43.3437385559082, "logps_train/policy_2_2": -110.09369659423828, "logps_train/policy_2_w": -68.33790588378906, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -62.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -79.0, "rewards_train/1-2": 1.0109070539474487, "rewards_train/1-l": -2.14547061920166, "rewards_train/1-w": 1.8507822751998901, "rewards_train/2-2": 2.500396251678467, "rewards_train/2-w": 1.0837873220443726, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9962528944015503, "rewards_train/margins_1": 0.8398752212524414, "rewards_train/margins_2": 1.4166089296340942, "step": 616 }, { "epoch": 1.84, "logps_train/policy_1_2": -131.22512817382812, "logps_train/policy_1_l": -156.2714385986328, "logps_train/policy_1_w": -52.75532913208008, "logps_train/policy_2_2": -97.24373626708984, "logps_train/policy_2_w": -78.99807739257812, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -71.5, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -91.0, "rewards_train/1-2": 0.8622527718544006, "rewards_train/1-l": -2.2886672019958496, "rewards_train/1-w": 1.8659710884094238, "rewards_train/2-2": 2.177872657775879, "rewards_train/2-w": 1.1582980155944824, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.154638290405273, "rewards_train/margins_1": 1.0037183165550232, "rewards_train/margins_2": 1.0195746421813965, "step": 616 }, { "epoch": 1.85, "logps_train/policy_1_2": -223.94140625, "logps_train/policy_1_l": -167.64154052734375, "logps_train/policy_1_w": -85.54444885253906, "logps_train/policy_2_2": -184.95263671875, "logps_train/policy_2_w": -112.51361846923828, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.8167965412139893, "rewards_train/1-l": -2.7516558170318604, "rewards_train/1-w": 3.029148578643799, "rewards_train/2-2": 3.760204553604126, "rewards_train/2-w": 1.9837944507598877, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.780804395675659, "rewards_train/margins_1": 1.2123520374298096, "rewards_train/margins_2": 1.7764101028442383, "step": 617 }, { "epoch": 1.85, "logps_train/policy_1_2": -127.51534271240234, "logps_train/policy_1_l": -119.20321655273438, "logps_train/policy_1_w": -88.74186706542969, "logps_train/policy_2_2": -103.05640411376953, "logps_train/policy_2_w": -104.74812316894531, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -100.5, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 1.9797155857086182, "rewards_train/1-l": -1.8694918155670166, "rewards_train/1-w": 2.986751079559326, "rewards_train/2-2": 2.4943594932556152, "rewards_train/2-w": 2.042374849319458, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.856242895126343, "rewards_train/margins_1": 1.007035493850708, "rewards_train/margins_2": 0.4519846439361572, "step": 617 }, { "epoch": 1.85, "logps_train/policy_1_2": -239.66561889648438, "logps_train/policy_1_l": -293.69586181640625, "logps_train/policy_1_w": -125.05293273925781, "logps_train/policy_2_2": -188.64492797851562, "logps_train/policy_2_w": -162.62942504882812, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -258.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -224.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.8365626335144043, "rewards_train/1-l": -3.6008377075195312, "rewards_train/1-w": 3.5243935585021973, "rewards_train/2-2": 3.5159752368927, "rewards_train/2-w": 2.52612042427063, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.1252312660217285, "rewards_train/margins_1": 1.687830924987793, "rewards_train/margins_2": 0.9898548126220703, "step": 617 }, { "epoch": 1.85, "logps_train/policy_1_2": -142.29649353027344, "logps_train/policy_1_l": -124.33273315429688, "logps_train/policy_1_w": -115.33627319335938, "logps_train/policy_2_2": -109.64575958251953, "logps_train/policy_2_w": -154.63076782226562, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.8566782474517822, "rewards_train/1-l": -1.5100312232971191, "rewards_train/1-w": 3.3991851806640625, "rewards_train/2-2": 3.3065178394317627, "rewards_train/2-w": 1.8021583557128906, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.909216403961182, "rewards_train/margins_1": 1.5425069332122803, "rewards_train/margins_2": 1.504359483718872, "step": 617 }, { "epoch": 1.85, "logps_train/policy_1_2": -86.51898193359375, "logps_train/policy_1_l": -159.82273864746094, "logps_train/policy_1_w": -143.47109985351562, "logps_train/policy_2_2": -64.63241577148438, "logps_train/policy_2_w": -192.16558837890625, "logps_train/ref_1_2": -103.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -89.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.6356019973754883, "rewards_train/1-l": -1.658348560333252, "rewards_train/1-w": 2.7513270378112793, "rewards_train/2-2": 2.430508613586426, "rewards_train/2-w": 0.7803168296813965, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.409675598144531, "rewards_train/margins_1": 1.115725040435791, "rewards_train/margins_2": 1.6501917839050293, "step": 617 }, { "epoch": 1.85, "logps_train/policy_1_2": -146.215576171875, "logps_train/policy_1_l": -143.70057678222656, "logps_train/policy_1_w": -127.89865112304688, "logps_train/policy_2_2": -119.26823425292969, "logps_train/policy_2_w": -167.88882446289062, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 2.209888458251953, "rewards_train/1-l": -1.9020886421203613, "rewards_train/1-w": 3.658571720123291, "rewards_train/2-2": 3.450544595718384, "rewards_train/2-w": 2.0017430782318115, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.560660362243652, "rewards_train/margins_1": 1.448683261871338, "rewards_train/margins_2": 1.4488015174865723, "step": 617 }, { "epoch": 1.85, "logps_train/policy_1_2": -256.9305419921875, "logps_train/policy_1_l": -322.61285400390625, "logps_train/policy_1_w": -182.292236328125, "logps_train/policy_2_2": -197.9292755126953, "logps_train/policy_2_w": -241.5706329345703, "logps_train/ref_1_2": -272.0, "logps_train/ref_1_l": -274.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -242.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": 1.494444489479065, "rewards_train/1-l": -4.8886308670043945, "rewards_train/1-w": 4.18952751159668, "rewards_train/2-2": 4.357071876525879, "rewards_train/2-w": 1.774186134338379, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 9.078158378601074, "rewards_train/margins_1": 2.6950830221176147, "rewards_train/margins_2": 2.5828857421875, "step": 617 }, { "epoch": 1.85, "logps_train/policy_1_2": -76.6797866821289, "logps_train/policy_1_l": -140.9725799560547, "logps_train/policy_1_w": -99.33311462402344, "logps_train/policy_2_2": -63.0982666015625, "logps_train/policy_2_w": -115.56364440917969, "logps_train/ref_1_2": -86.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -80.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 0.9476460814476013, "rewards_train/1-l": -1.9083912372589111, "rewards_train/1-w": 2.185438632965088, "rewards_train/2-2": 1.6761109828948975, "rewards_train/2-w": 1.2483233213424683, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.093829870223999, "rewards_train/margins_1": 1.2377925515174866, "rewards_train/margins_2": 0.4277876615524292, "step": 617 }, { "epoch": 1.85, "learning_rate": 8.201874409610732e-08, "loss": 0.4588, "step": 618 }, { "epoch": 1.85, "logps_train/policy_1_2": -172.72390747070312, "logps_train/policy_1_l": -173.8029327392578, "logps_train/policy_1_w": -140.13763427734375, "logps_train/policy_2_2": -139.50616455078125, "logps_train/policy_2_w": -179.0013427734375, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 1.766671895980835, "rewards_train/1-l": -2.041377305984497, "rewards_train/1-w": 3.1032769680023193, "rewards_train/2-2": 2.9376659393310547, "rewards_train/2-w": 1.4596319198608398, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.144654273986816, "rewards_train/margins_1": 1.3366050720214844, "rewards_train/margins_2": 1.4780340194702148, "step": 618 }, { "epoch": 1.85, "logps_train/policy_1_2": -153.9073486328125, "logps_train/policy_1_l": -188.17294311523438, "logps_train/policy_1_w": -125.55886840820312, "logps_train/policy_2_2": -111.46027374267578, "logps_train/policy_2_w": -182.51495361328125, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": 1.3991081714630127, "rewards_train/1-l": -2.415292739868164, "rewards_train/1-w": 3.10817551612854, "rewards_train/2-2": 3.0383479595184326, "rewards_train/2-w": 1.2844420671463013, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.523468255996704, "rewards_train/margins_1": 1.7090673446655273, "rewards_train/margins_2": 1.7539058923721313, "step": 618 }, { "epoch": 1.85, "logps_train/policy_1_2": -120.5329818725586, "logps_train/policy_1_l": -140.02357482910156, "logps_train/policy_1_w": -64.47842407226562, "logps_train/policy_2_2": -89.01280212402344, "logps_train/policy_2_w": -94.48052978515625, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -114.5, "logps_train/ref_1_w": -93.5, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 1.1217020750045776, "rewards_train/1-l": -2.542200803756714, "rewards_train/1-w": 2.8951258659362793, "rewards_train/2-2": 2.631141185760498, "rewards_train/2-w": 1.6355406045913696, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.437326669692993, "rewards_train/margins_1": 1.7734237909317017, "rewards_train/margins_2": 0.9956005811691284, "step": 618 }, { "epoch": 1.85, "logps_train/policy_1_2": -162.5177001953125, "logps_train/policy_1_l": -120.51631164550781, "logps_train/policy_1_w": -97.22843933105469, "logps_train/policy_2_2": -114.64945983886719, "logps_train/policy_2_w": -139.86656188964844, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -96.5, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.1841667890548706, "rewards_train/1-l": -2.401240110397339, "rewards_train/1-w": 3.1037187576293945, "rewards_train/2-2": 3.212397813796997, "rewards_train/2-w": 1.208657145500183, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.504958868026733, "rewards_train/margins_1": 1.919551968574524, "rewards_train/margins_2": 2.003740668296814, "step": 618 }, { "epoch": 1.85, "logps_train/policy_1_2": -195.03555297851562, "logps_train/policy_1_l": -198.34201049804688, "logps_train/policy_1_w": -184.29159545898438, "logps_train/policy_2_2": -155.44244384765625, "logps_train/policy_2_w": -234.57427978515625, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -224.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.7026941776275635, "rewards_train/1-l": -2.679513454437256, "rewards_train/1-w": 3.992715835571289, "rewards_train/2-2": 3.415131092071533, "rewards_train/2-w": 1.9425718784332275, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.672229290008545, "rewards_train/margins_1": 2.2900216579437256, "rewards_train/margins_2": 1.4725592136383057, "step": 618 }, { "epoch": 1.85, "logps_train/policy_1_2": -141.9605712890625, "logps_train/policy_1_l": -197.36294555664062, "logps_train/policy_1_w": -147.83969116210938, "logps_train/policy_2_2": -111.6920166015625, "logps_train/policy_2_w": -189.45367431640625, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": 1.8289425373077393, "rewards_train/1-l": -2.6212542057037354, "rewards_train/1-w": 3.602358102798462, "rewards_train/2-2": 2.8296260833740234, "rewards_train/2-w": 1.9308042526245117, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.223612308502197, "rewards_train/margins_1": 1.7734155654907227, "rewards_train/margins_2": 0.8988218307495117, "step": 618 }, { "epoch": 1.85, "logps_train/policy_1_2": -158.06124877929688, "logps_train/policy_1_l": -191.28359985351562, "logps_train/policy_1_w": -132.2384490966797, "logps_train/policy_2_2": -103.07630157470703, "logps_train/policy_2_w": -183.81924438476562, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": 1.275905728340149, "rewards_train/1-l": -1.355411410331726, "rewards_train/1-w": 3.2264487743377686, "rewards_train/2-2": 3.147838592529297, "rewards_train/2-w": 1.1126058101654053, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.581860184669495, "rewards_train/margins_1": 1.9505430459976196, "rewards_train/margins_2": 2.0352327823638916, "step": 618 }, { "epoch": 1.85, "logps_train/policy_1_2": -140.35508728027344, "logps_train/policy_1_l": -118.46337127685547, "logps_train/policy_1_w": -148.404296875, "logps_train/policy_2_2": -117.09733581542969, "logps_train/policy_2_w": -171.3827362060547, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.7363667488098145, "rewards_train/1-l": -0.9827626347541809, "rewards_train/1-w": 3.5095701217651367, "rewards_train/2-2": 2.5340166091918945, "rewards_train/2-w": 2.424226760864258, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.492332756519318, "rewards_train/margins_1": 1.7732033729553223, "rewards_train/margins_2": 0.10978984832763672, "step": 618 }, { "epoch": 1.85, "logps_train/policy_1_2": -207.79559326171875, "logps_train/policy_1_l": -219.64389038085938, "logps_train/policy_1_w": -135.06564331054688, "logps_train/policy_2_2": -163.72607421875, "logps_train/policy_2_w": -164.0570831298828, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -193.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.1782548427581787, "rewards_train/1-l": -2.620248317718506, "rewards_train/1-w": 2.7311315536499023, "rewards_train/2-2": 3.3742685317993164, "rewards_train/2-w": 1.7161662578582764, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.351379871368408, "rewards_train/margins_1": 1.5528767108917236, "rewards_train/margins_2": 1.65810227394104, "step": 619 }, { "epoch": 1.85, "logps_train/policy_1_2": -223.03585815429688, "logps_train/policy_1_l": -260.76068115234375, "logps_train/policy_1_w": -136.59323120117188, "logps_train/policy_2_2": -189.26966857910156, "logps_train/policy_2_w": -171.05059814453125, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 2.327665328979492, "rewards_train/1-l": -2.9838807582855225, "rewards_train/1-w": 3.66255259513855, "rewards_train/2-2": 3.9355335235595703, "rewards_train/2-w": 2.548064947128296, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.646433353424072, "rewards_train/margins_1": 1.3348872661590576, "rewards_train/margins_2": 1.3874685764312744, "step": 619 }, { "epoch": 1.85, "logps_train/policy_1_2": -115.94193267822266, "logps_train/policy_1_l": -179.8811492919922, "logps_train/policy_1_w": -101.78303527832031, "logps_train/policy_2_2": -87.94573974609375, "logps_train/policy_2_w": -139.2974090576172, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -112.5, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.294869303703308, "rewards_train/1-l": -2.1150686740875244, "rewards_train/1-w": 2.6791181564331055, "rewards_train/2-2": 2.4476141929626465, "rewards_train/2-w": 1.2374467849731445, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.79418683052063, "rewards_train/margins_1": 1.3842488527297974, "rewards_train/margins_2": 1.210167407989502, "step": 619 }, { "epoch": 1.85, "logps_train/policy_1_2": -135.74850463867188, "logps_train/policy_1_l": -170.32113647460938, "logps_train/policy_1_w": -130.55453491210938, "logps_train/policy_2_2": -103.463623046875, "logps_train/policy_2_w": -169.74945068359375, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 2.1595258712768555, "rewards_train/1-l": -2.3910980224609375, "rewards_train/1-w": 3.319643974304199, "rewards_train/2-2": 3.2630128860473633, "rewards_train/2-w": 1.817535638809204, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.710741996765137, "rewards_train/margins_1": 1.1601181030273438, "rewards_train/margins_2": 1.4454772472381592, "step": 619 }, { "epoch": 1.85, "logps_train/policy_1_2": -178.16018676757812, "logps_train/policy_1_l": -203.26548767089844, "logps_train/policy_1_w": -199.07818603515625, "logps_train/policy_2_2": -149.71481323242188, "logps_train/policy_2_w": -221.50686645507812, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -232.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -244.0, "rewards_train/1-2": 3.3589818477630615, "rewards_train/1-l": -2.3452982902526855, "rewards_train/1-w": 3.3212833404541016, "rewards_train/2-2": 4.220706462860107, "rewards_train/2-w": 2.207906723022461, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.666581630706787, "rewards_train/margins_1": -0.03769850730895996, "rewards_train/margins_2": 2.0127997398376465, "step": 619 }, { "epoch": 1.85, "logps_train/policy_1_2": -69.24095916748047, "logps_train/policy_1_l": -167.9332275390625, "logps_train/policy_1_w": -92.29269409179688, "logps_train/policy_2_2": -51.846168518066406, "logps_train/policy_2_w": -132.53968811035156, "logps_train/ref_1_2": -80.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -69.0, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": 1.111060619354248, "rewards_train/1-l": -2.517540693283081, "rewards_train/1-w": 2.212918758392334, "rewards_train/2-2": 1.7274926900863647, "rewards_train/2-w": 0.6897810697555542, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.730459451675415, "rewards_train/margins_1": 1.101858139038086, "rewards_train/margins_2": 1.0377116203308105, "step": 619 }, { "epoch": 1.85, "logps_train/policy_1_2": -134.16253662109375, "logps_train/policy_1_l": -65.42877197265625, "logps_train/policy_1_w": -93.97592163085938, "logps_train/policy_2_2": -93.06742858886719, "logps_train/policy_2_w": -138.52572631835938, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -54.25, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.15562105178833, "rewards_train/1-l": -1.112213373184204, "rewards_train/1-w": 2.9473299980163574, "rewards_train/2-2": 2.9854445457458496, "rewards_train/2-w": 0.8091467618942261, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.0595433712005615, "rewards_train/margins_1": 1.7917089462280273, "rewards_train/margins_2": 2.1762977838516235, "step": 619 }, { "epoch": 1.85, "logps_train/policy_1_2": -145.513916015625, "logps_train/policy_1_l": -174.04856872558594, "logps_train/policy_1_w": -116.40628051757812, "logps_train/policy_2_2": -118.33767700195312, "logps_train/policy_2_w": -143.61199951171875, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.7814210653305054, "rewards_train/1-l": -2.2329819202423096, "rewards_train/1-w": 3.732028007507324, "rewards_train/2-2": 2.8256077766418457, "rewards_train/2-w": 2.6747379302978516, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.965009927749634, "rewards_train/margins_1": 1.9506069421768188, "rewards_train/margins_2": 0.15086984634399414, "step": 619 }, { "epoch": 1.86, "learning_rate": 7.586243367104895e-08, "loss": 0.3915, "step": 620 }, { "epoch": 1.86, "logps_train/policy_1_2": -75.3148193359375, "logps_train/policy_1_l": -112.02449035644531, "logps_train/policy_1_w": -84.24018096923828, "logps_train/policy_2_2": -46.594757080078125, "logps_train/policy_2_w": -122.30665588378906, "logps_train/ref_1_2": -85.0, "logps_train/ref_1_l": -84.5, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -67.5, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 0.949768602848053, "rewards_train/1-l": -2.749518871307373, "rewards_train/1-w": 2.54771089553833, "rewards_train/2-2": 2.1038055419921875, "rewards_train/2-w": 1.305272102355957, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.297229766845703, "rewards_train/margins_1": 1.597942292690277, "rewards_train/margins_2": 0.7985334396362305, "step": 620 }, { "epoch": 1.86, "logps_train/policy_1_2": -65.24120330810547, "logps_train/policy_1_l": -129.61021423339844, "logps_train/policy_1_w": -61.18449401855469, "logps_train/policy_2_2": -53.099769592285156, "logps_train/policy_2_w": -81.54540252685547, "logps_train/ref_1_2": -80.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -87.5, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -97.0, "rewards_train/1-2": 1.475293517112732, "rewards_train/1-l": -2.7854347229003906, "rewards_train/1-w": 2.6354565620422363, "rewards_train/2-2": 2.093538522720337, "rewards_train/2-w": 1.5252447128295898, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.420891284942627, "rewards_train/margins_1": 1.1601630449295044, "rewards_train/margins_2": 0.5682938098907471, "step": 620 }, { "epoch": 1.86, "logps_train/policy_1_2": -187.92715454101562, "logps_train/policy_1_l": -169.08004760742188, "logps_train/policy_1_w": -169.66104125976562, "logps_train/policy_2_2": -155.17330932617188, "logps_train/policy_2_w": -198.6417236328125, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -225.0, "rewards_train/1-2": 1.8932231664657593, "rewards_train/1-l": -1.6134731769561768, "rewards_train/1-w": 4.025300979614258, "rewards_train/2-2": 3.449075222015381, "rewards_train/2-w": 2.6108269691467285, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.638774156570435, "rewards_train/margins_1": 2.1320778131484985, "rewards_train/margins_2": 0.8382482528686523, "step": 620 }, { "epoch": 1.86, "logps_train/policy_1_2": -186.54669189453125, "logps_train/policy_1_l": -210.5283203125, "logps_train/policy_1_w": -146.42343139648438, "logps_train/policy_2_2": -132.45379638671875, "logps_train/policy_2_w": -192.59906005859375, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": 1.4953311681747437, "rewards_train/1-l": -1.8434557914733887, "rewards_train/1-w": 2.9795327186584473, "rewards_train/2-2": 3.517120361328125, "rewards_train/2-w": 1.4525938034057617, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.822988510131836, "rewards_train/margins_1": 1.4842015504837036, "rewards_train/margins_2": 2.0645265579223633, "step": 620 }, { "epoch": 1.86, "logps_train/policy_1_2": -117.21913146972656, "logps_train/policy_1_l": -134.77383422851562, "logps_train/policy_1_w": -95.65151977539062, "logps_train/policy_2_2": -85.29360961914062, "logps_train/policy_2_w": -128.1650848388672, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -107.5, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 0.8882431387901306, "rewards_train/1-l": -2.5547266006469727, "rewards_train/1-w": 2.3950037956237793, "rewards_train/2-2": 2.2253265380859375, "rewards_train/2-w": 0.7491159439086914, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.949730396270752, "rewards_train/margins_1": 1.5067606568336487, "rewards_train/margins_2": 1.476210594177246, "step": 620 }, { "epoch": 1.86, "logps_train/policy_1_2": -156.73008728027344, "logps_train/policy_1_l": -157.88470458984375, "logps_train/policy_1_w": -119.56212615966797, "logps_train/policy_2_2": -118.41343688964844, "logps_train/policy_2_w": -169.1990966796875, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 2.0519912242889404, "rewards_train/1-l": -1.4994077682495117, "rewards_train/1-w": 4.171913146972656, "rewards_train/2-2": 3.7149055004119873, "rewards_train/2-w": 2.1050896644592285, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.671320915222168, "rewards_train/margins_1": 2.119921922683716, "rewards_train/margins_2": 1.6098158359527588, "step": 620 }, { "epoch": 1.86, "logps_train/policy_1_2": -97.0744400024414, "logps_train/policy_1_l": -165.1468963623047, "logps_train/policy_1_w": -158.05191040039062, "logps_train/policy_2_2": -74.63328552246094, "logps_train/policy_2_w": -195.83932495117188, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.6839622259140015, "rewards_train/1-l": -3.005899667739868, "rewards_train/1-w": 3.486996650695801, "rewards_train/2-2": 2.366358757019043, "rewards_train/2-w": 1.8879413604736328, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.492896318435669, "rewards_train/margins_1": 1.8030344247817993, "rewards_train/margins_2": 0.47841739654541016, "step": 620 }, { "epoch": 1.86, "logps_train/policy_1_2": -69.24353790283203, "logps_train/policy_1_l": -102.2890625, "logps_train/policy_1_w": -54.91749954223633, "logps_train/policy_2_2": -56.90400695800781, "logps_train/policy_2_w": -76.72364807128906, "logps_train/ref_1_2": -83.0, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -77.5, "logps_train/ref_2_2": -76.5, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": 1.3967397212982178, "rewards_train/1-l": -1.553515911102295, "rewards_train/1-w": 2.262937545776367, "rewards_train/2-2": 1.944756031036377, "rewards_train/2-w": 1.5174791812896729, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.816453456878662, "rewards_train/margins_1": 0.8661978244781494, "rewards_train/margins_2": 0.4272768497467041, "step": 620 }, { "epoch": 1.86, "logps_train/policy_1_2": -142.3112335205078, "logps_train/policy_1_l": -181.2059326171875, "logps_train/policy_1_w": -123.13719177246094, "logps_train/policy_2_2": -117.0001449584961, "logps_train/policy_2_w": -167.64892578125, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.9407520294189453, "rewards_train/1-l": -3.2159056663513184, "rewards_train/1-w": 3.4222185611724854, "rewards_train/2-2": 2.924985647201538, "rewards_train/2-w": 1.3976078033447266, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.638124227523804, "rewards_train/margins_1": 1.48146653175354, "rewards_train/margins_2": 1.5273778438568115, "step": 621 }, { "epoch": 1.86, "logps_train/policy_1_2": -202.95281982421875, "logps_train/policy_1_l": -168.13882446289062, "logps_train/policy_1_w": -144.73501586914062, "logps_train/policy_2_2": -159.38150024414062, "logps_train/policy_2_w": -205.9555206298828, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.9640936851501465, "rewards_train/1-l": -1.9863922595977783, "rewards_train/1-w": 3.682748317718506, "rewards_train/2-2": 3.930601119995117, "rewards_train/2-w": 1.3013230562210083, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.669140577316284, "rewards_train/margins_1": 1.7186546325683594, "rewards_train/margins_2": 2.629278063774109, "step": 621 }, { "epoch": 1.86, "logps_train/policy_1_2": -183.89549255371094, "logps_train/policy_1_l": -224.0439910888672, "logps_train/policy_1_w": -152.75535583496094, "logps_train/policy_2_2": -153.80426025390625, "logps_train/policy_2_w": -183.28009033203125, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 2.3627939224243164, "rewards_train/1-l": -3.0172171592712402, "rewards_train/1-w": 4.504152774810791, "rewards_train/2-2": 3.3832461833953857, "rewards_train/2-w": 3.2616403102874756, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.521369934082031, "rewards_train/margins_1": 2.1413588523864746, "rewards_train/margins_2": 0.12160587310791016, "step": 621 }, { "epoch": 1.86, "logps_train/policy_1_2": -205.4381866455078, "logps_train/policy_1_l": -199.80014038085938, "logps_train/policy_1_w": -144.0797882080078, "logps_train/policy_2_2": -167.33599853515625, "logps_train/policy_2_w": -177.40411376953125, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.322197437286377, "rewards_train/1-l": -2.2488627433776855, "rewards_train/1-w": 2.926396131515503, "rewards_train/2-2": 2.73173189163208, "rewards_train/2-w": 1.4603697061538696, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.1752588748931885, "rewards_train/margins_1": 1.604198694229126, "rewards_train/margins_2": 1.2713621854782104, "step": 621 }, { "epoch": 1.86, "logps_train/policy_1_2": -92.49073791503906, "logps_train/policy_1_l": -133.42018127441406, "logps_train/policy_1_w": -69.57347106933594, "logps_train/policy_2_2": -73.31819152832031, "logps_train/policy_2_w": -99.5935287475586, "logps_train/ref_1_2": -107.5, "logps_train/ref_1_l": -114.5, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -95.5, "logps_train/ref_2_w": -113.0, "rewards_train/1-2": 1.4845194816589355, "rewards_train/1-l": -1.9138922691345215, "rewards_train/1-w": 2.6199963092803955, "rewards_train/2-2": 2.213102340698242, "rewards_train/2-w": 1.316037893295288, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.533888578414917, "rewards_train/margins_1": 1.13547682762146, "rewards_train/margins_2": 0.8970644474029541, "step": 621 }, { "epoch": 1.86, "logps_train/policy_1_2": -141.8756866455078, "logps_train/policy_1_l": -147.0596923828125, "logps_train/policy_1_w": -141.38565063476562, "logps_train/policy_2_2": -106.81957244873047, "logps_train/policy_2_w": -180.47537231445312, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.797587275505066, "rewards_train/1-l": -2.122765302658081, "rewards_train/1-w": 2.190732479095459, "rewards_train/2-2": 3.2571051120758057, "rewards_train/2-w": 0.4430875778198242, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.31349778175354, "rewards_train/margins_1": 0.39314520359039307, "rewards_train/margins_2": 2.8140175342559814, "step": 621 }, { "epoch": 1.86, "logps_train/policy_1_2": -87.2750015258789, "logps_train/policy_1_l": -100.28729248046875, "logps_train/policy_1_w": -47.10285949707031, "logps_train/policy_2_2": -66.9874038696289, "logps_train/policy_2_w": -66.90980529785156, "logps_train/ref_1_2": -99.0, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -69.0, "logps_train/ref_2_2": -87.5, "logps_train/ref_2_w": -78.5, "rewards_train/1-2": 1.163906455039978, "rewards_train/1-l": -1.81437349319458, "rewards_train/1-w": 2.170964002609253, "rewards_train/2-2": 2.034853458404541, "rewards_train/2-w": 1.1598008871078491, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.985337495803833, "rewards_train/margins_1": 1.007057547569275, "rewards_train/margins_2": 0.8750525712966919, "step": 621 }, { "epoch": 1.86, "logps_train/policy_1_2": -93.07646179199219, "logps_train/policy_1_l": -108.83808898925781, "logps_train/policy_1_w": -85.44017028808594, "logps_train/policy_2_2": -70.82750701904297, "logps_train/policy_2_w": -109.67109680175781, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -92.5, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 1.0861035585403442, "rewards_train/1-l": -0.9619336128234863, "rewards_train/1-w": 2.5337183475494385, "rewards_train/2-2": 2.1547493934631348, "rewards_train/2-w": 1.3914841413497925, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.495651960372925, "rewards_train/margins_1": 1.4476147890090942, "rewards_train/margins_2": 0.7632652521133423, "step": 621 }, { "epoch": 1.86, "learning_rate": 6.994271479897313e-08, "loss": 0.4515, "step": 622 }, { "epoch": 1.86, "logps_train/policy_1_2": -207.93753051757812, "logps_train/policy_1_l": -185.75228881835938, "logps_train/policy_1_w": -129.40863037109375, "logps_train/policy_2_2": -160.22653198242188, "logps_train/policy_2_w": -167.38665771484375, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 2.2093729972839355, "rewards_train/1-l": -3.0443692207336426, "rewards_train/1-w": 4.154450416564941, "rewards_train/2-2": 4.420315742492676, "rewards_train/2-w": 2.680084228515625, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.198819637298584, "rewards_train/margins_1": 1.9450774192810059, "rewards_train/margins_2": 1.7402315139770508, "step": 622 }, { "epoch": 1.86, "logps_train/policy_1_2": -194.5516815185547, "logps_train/policy_1_l": -206.45367431640625, "logps_train/policy_1_w": -154.30836486816406, "logps_train/policy_2_2": -158.08212280273438, "logps_train/policy_2_w": -203.1627197265625, "logps_train/ref_1_2": -213.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": 1.8760817050933838, "rewards_train/1-l": -2.6422414779663086, "rewards_train/1-w": 4.0183820724487305, "rewards_train/2-2": 3.6152241230010986, "rewards_train/2-w": 1.5954465866088867, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.660623550415039, "rewards_train/margins_1": 2.1423003673553467, "rewards_train/margins_2": 2.019777536392212, "step": 622 }, { "epoch": 1.86, "logps_train/policy_1_2": -201.84120178222656, "logps_train/policy_1_l": -206.7667694091797, "logps_train/policy_1_w": -187.6679229736328, "logps_train/policy_2_2": -152.19107055664062, "logps_train/policy_2_w": -259.0688171386719, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -228.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -276.0, "rewards_train/1-2": 2.1221296787261963, "rewards_train/1-l": -2.961052894592285, "rewards_train/1-w": 4.130082130432129, "rewards_train/2-2": 3.7246429920196533, "rewards_train/2-w": 1.7868690490722656, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.091135025024414, "rewards_train/margins_1": 2.0079524517059326, "rewards_train/margins_2": 1.9377739429473877, "step": 622 }, { "epoch": 1.86, "logps_train/policy_1_2": -116.24346923828125, "logps_train/policy_1_l": -151.78475952148438, "logps_train/policy_1_w": -83.77052307128906, "logps_train/policy_2_2": -83.5941162109375, "logps_train/policy_2_w": -116.33236694335938, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -115.5, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": 1.8350276947021484, "rewards_train/1-l": -3.298886299133301, "rewards_train/1-w": 3.1916003227233887, "rewards_train/2-2": 2.9491825103759766, "rewards_train/2-w": 2.280435085296631, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.4904866218566895, "rewards_train/margins_1": 1.3565726280212402, "rewards_train/margins_2": 0.6687474250793457, "step": 622 }, { "epoch": 1.86, "logps_train/policy_1_2": -158.09078979492188, "logps_train/policy_1_l": -182.5872802734375, "logps_train/policy_1_w": -159.6419677734375, "logps_train/policy_2_2": -115.455322265625, "logps_train/policy_2_w": -203.11402893066406, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 1.5752959251403809, "rewards_train/1-l": -3.308727741241455, "rewards_train/1-w": 3.7350218296051025, "rewards_train/2-2": 3.2333734035491943, "rewards_train/2-w": 1.6960184574127197, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.043749570846558, "rewards_train/margins_1": 2.1597259044647217, "rewards_train/margins_2": 1.5373549461364746, "step": 622 }, { "epoch": 1.86, "logps_train/policy_1_2": -120.07315826416016, "logps_train/policy_1_l": -102.44830322265625, "logps_train/policy_1_w": -92.66011810302734, "logps_train/policy_2_2": -80.790283203125, "logps_train/policy_2_w": -124.41780090332031, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": 1.1676844358444214, "rewards_train/1-l": -1.9223692417144775, "rewards_train/1-w": 2.758988380432129, "rewards_train/2-2": 2.8057374954223633, "rewards_train/2-w": 1.4488451480865479, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.6813576221466064, "rewards_train/margins_1": 1.5913039445877075, "rewards_train/margins_2": 1.3568923473358154, "step": 622 }, { "epoch": 1.86, "logps_train/policy_1_2": -167.32586669921875, "logps_train/policy_1_l": -139.91378784179688, "logps_train/policy_1_w": -135.38998413085938, "logps_train/policy_2_2": -126.93073272705078, "logps_train/policy_2_w": -191.89157104492188, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.731476068496704, "rewards_train/1-l": -1.7769262790679932, "rewards_train/1-w": 4.160609722137451, "rewards_train/2-2": 3.354583263397217, "rewards_train/2-w": 2.033108711242676, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.937536001205444, "rewards_train/margins_1": 2.429133653640747, "rewards_train/margins_2": 1.321474552154541, "step": 622 }, { "epoch": 1.86, "logps_train/policy_1_2": -112.51300048828125, "logps_train/policy_1_l": -149.02650451660156, "logps_train/policy_1_w": -86.55149841308594, "logps_train/policy_2_2": -88.59121704101562, "logps_train/policy_2_w": -117.82853698730469, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": 1.3358089923858643, "rewards_train/1-l": -2.8907365798950195, "rewards_train/1-w": 2.9479751586914062, "rewards_train/2-2": 2.2877535820007324, "rewards_train/2-w": 1.973397135734558, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.838711738586426, "rewards_train/margins_1": 1.612166166305542, "rewards_train/margins_2": 0.3143564462661743, "step": 622 }, { "epoch": 1.87, "logps_train/policy_1_2": -193.06871032714844, "logps_train/policy_1_l": -227.16476440429688, "logps_train/policy_1_w": -129.91400146484375, "logps_train/policy_2_2": -144.91519165039062, "logps_train/policy_2_w": -195.4717559814453, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": 1.6915667057037354, "rewards_train/1-l": -2.257101058959961, "rewards_train/1-w": 3.49922513961792, "rewards_train/2-2": 3.5477380752563477, "rewards_train/2-w": 1.5309503078460693, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.756326198577881, "rewards_train/margins_1": 1.8076584339141846, "rewards_train/margins_2": 2.0167877674102783, "step": 623 }, { "epoch": 1.87, "logps_train/policy_1_2": -107.1655044555664, "logps_train/policy_1_l": -150.1861114501953, "logps_train/policy_1_w": -94.77344512939453, "logps_train/policy_2_2": -81.77655029296875, "logps_train/policy_2_w": -135.35269165039062, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.6314964294433594, "rewards_train/1-l": -2.139899730682373, "rewards_train/1-w": 2.558593511581421, "rewards_train/2-2": 2.6694159507751465, "rewards_train/2-w": 1.278793454170227, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.698493242263794, "rewards_train/margins_1": 0.9270970821380615, "rewards_train/margins_2": 1.3906224966049194, "step": 623 }, { "epoch": 1.87, "logps_train/policy_1_2": -119.4670181274414, "logps_train/policy_1_l": -115.1492691040039, "logps_train/policy_1_w": -127.35138702392578, "logps_train/policy_2_2": -85.12471008300781, "logps_train/policy_2_w": -159.03628540039062, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.7720483541488647, "rewards_train/1-l": -1.2294772863388062, "rewards_train/1-w": 3.1828298568725586, "rewards_train/2-2": 2.6828417778015137, "rewards_train/2-w": 1.8776211738586426, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.412307143211365, "rewards_train/margins_1": 1.4107815027236938, "rewards_train/margins_2": 0.8052206039428711, "step": 623 }, { "epoch": 1.87, "logps_train/policy_1_2": -92.06291961669922, "logps_train/policy_1_l": -98.38961791992188, "logps_train/policy_1_w": -54.68566131591797, "logps_train/policy_2_2": -66.54680633544922, "logps_train/policy_2_w": -77.49278259277344, "logps_train/ref_1_2": -105.0, "logps_train/ref_1_l": -79.0, "logps_train/ref_1_w": -73.5, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -86.5, "rewards_train/1-2": 1.3155828714370728, "rewards_train/1-l": -1.9303687810897827, "rewards_train/1-w": 1.8911992311477661, "rewards_train/2-2": 2.484381914138794, "rewards_train/2-w": 0.9272841215133667, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.821568012237549, "rewards_train/margins_1": 0.5756163597106934, "rewards_train/margins_2": 1.5570977926254272, "step": 623 }, { "epoch": 1.87, "logps_train/policy_1_2": -105.93484497070312, "logps_train/policy_1_l": -173.80064392089844, "logps_train/policy_1_w": -106.4163818359375, "logps_train/policy_2_2": -79.14370727539062, "logps_train/policy_2_w": -151.20993041992188, "logps_train/ref_1_2": -121.5, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.5690158605575562, "rewards_train/1-l": -2.0441269874572754, "rewards_train/1-w": 2.895081043243408, "rewards_train/2-2": 2.588754415512085, "rewards_train/2-w": 1.6329138278961182, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.939208030700684, "rewards_train/margins_1": 1.326065182685852, "rewards_train/margins_2": 0.9558405876159668, "step": 623 }, { "epoch": 1.87, "logps_train/policy_1_2": -146.14706420898438, "logps_train/policy_1_l": -171.83543395996094, "logps_train/policy_1_w": -91.51255798339844, "logps_train/policy_2_2": -114.40373992919922, "logps_train/policy_2_w": -139.10760498046875, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.1884195804595947, "rewards_train/1-l": -2.3820791244506836, "rewards_train/1-w": 2.865149974822998, "rewards_train/2-2": 2.526813507080078, "rewards_train/2-w": 1.1478341817855835, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.247229099273682, "rewards_train/margins_1": 1.6767303943634033, "rewards_train/margins_2": 1.3789793252944946, "step": 623 }, { "epoch": 1.87, "logps_train/policy_1_2": -123.42405700683594, "logps_train/policy_1_l": -136.5856170654297, "logps_train/policy_1_w": -84.13603973388672, "logps_train/policy_2_2": -91.82036590576172, "logps_train/policy_2_w": -112.2872085571289, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -125.5, "rewards_train/1-2": 0.9294694066047668, "rewards_train/1-l": -2.045426845550537, "rewards_train/1-w": 2.614521026611328, "rewards_train/2-2": 2.2468695640563965, "rewards_train/2-w": 1.311513900756836, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.659947872161865, "rewards_train/margins_1": 1.6850516200065613, "rewards_train/margins_2": 0.9353556632995605, "step": 623 }, { "epoch": 1.87, "logps_train/policy_1_2": -149.48483276367188, "logps_train/policy_1_l": -176.6393280029297, "logps_train/policy_1_w": -131.75289916992188, "logps_train/policy_2_2": -113.17909240722656, "logps_train/policy_2_w": -177.02972412109375, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.7062042951583862, "rewards_train/1-l": -2.343620777130127, "rewards_train/1-w": 4.496584892272949, "rewards_train/2-2": 3.2703723907470703, "rewards_train/2-w": 2.8532767295837402, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 6.840205669403076, "rewards_train/margins_1": 2.790380597114563, "rewards_train/margins_2": 0.4170956611633301, "step": 623 }, { "epoch": 1.87, "learning_rate": 6.426016523400553e-08, "loss": 0.3221, "step": 624 }, { "epoch": 1.87, "logps_train/policy_1_2": -171.03201293945312, "logps_train/policy_1_l": -222.2100372314453, "logps_train/policy_1_w": -175.48849487304688, "logps_train/policy_2_2": -126.60128021240234, "logps_train/policy_2_w": -236.89369201660156, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -219.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -253.0, "rewards_train/1-2": 1.856954574584961, "rewards_train/1-l": -2.5530357360839844, "rewards_train/1-w": 4.348805904388428, "rewards_train/2-2": 3.4000282287597656, "rewards_train/2-w": 1.6778185367584229, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.901841640472412, "rewards_train/margins_1": 2.491851329803467, "rewards_train/margins_2": 1.7222096920013428, "step": 624 }, { "epoch": 1.87, "logps_train/policy_1_2": -119.89404296875, "logps_train/policy_1_l": -117.209228515625, "logps_train/policy_1_w": -135.16773986816406, "logps_train/policy_2_2": -100.86581420898438, "logps_train/policy_2_w": -175.87197875976562, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -101.5, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.8234869241714478, "rewards_train/1-l": -1.6056891679763794, "rewards_train/1-w": 3.363694667816162, "rewards_train/2-2": 2.6909568309783936, "rewards_train/2-w": 1.39327073097229, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.9693838357925415, "rewards_train/margins_1": 1.5402077436447144, "rewards_train/margins_2": 1.2976861000061035, "step": 624 }, { "epoch": 1.87, "logps_train/policy_1_2": -230.11898803710938, "logps_train/policy_1_l": -208.74594116210938, "logps_train/policy_1_w": -203.74966430664062, "logps_train/policy_2_2": -178.44454956054688, "logps_train/policy_2_w": -253.3460693359375, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -238.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -264.0, "rewards_train/1-2": 1.6131017208099365, "rewards_train/1-l": -2.5728378295898438, "rewards_train/1-w": 3.498471260070801, "rewards_train/2-2": 4.024294853210449, "rewards_train/2-w": 1.00133216381073, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.0713090896606445, "rewards_train/margins_1": 1.8853695392608643, "rewards_train/margins_2": 3.0229626893997192, "step": 624 }, { "epoch": 1.87, "logps_train/policy_1_2": -141.10816955566406, "logps_train/policy_1_l": -190.9071807861328, "logps_train/policy_1_w": -113.98912048339844, "logps_train/policy_2_2": -116.69174194335938, "logps_train/policy_2_w": -152.42572021484375, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.7290270328521729, "rewards_train/1-l": -2.4157180786132812, "rewards_train/1-w": 3.1690564155578613, "rewards_train/2-2": 2.597231864929199, "rewards_train/2-w": 1.8214902877807617, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.584774494171143, "rewards_train/margins_1": 1.4400293827056885, "rewards_train/margins_2": 0.7757415771484375, "step": 624 }, { "epoch": 1.87, "logps_train/policy_1_2": -212.3224639892578, "logps_train/policy_1_l": -213.42254638671875, "logps_train/policy_1_w": -164.15582275390625, "logps_train/policy_2_2": -157.9630126953125, "logps_train/policy_2_w": -226.63739013671875, "logps_train/ref_1_2": -225.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": 1.3052544593811035, "rewards_train/1-l": -2.7121288776397705, "rewards_train/1-w": 4.210198879241943, "rewards_train/2-2": 3.684948205947876, "rewards_train/2-w": 1.3807923793792725, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.922327756881714, "rewards_train/margins_1": 2.90494441986084, "rewards_train/margins_2": 2.3041558265686035, "step": 624 }, { "epoch": 1.87, "logps_train/policy_1_2": -131.1529998779297, "logps_train/policy_1_l": -119.52171325683594, "logps_train/policy_1_w": -68.36361694335938, "logps_train/policy_2_2": -96.05796813964844, "logps_train/policy_2_w": -101.39639282226562, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -95.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -112.0, "rewards_train/1-2": 1.0096995830535889, "rewards_train/1-l": -2.266037940979004, "rewards_train/1-w": 2.6718406677246094, "rewards_train/2-2": 2.4567036628723145, "rewards_train/2-w": 1.104501485824585, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.937878608703613, "rewards_train/margins_1": 1.6621410846710205, "rewards_train/margins_2": 1.3522021770477295, "step": 624 }, { "epoch": 1.87, "logps_train/policy_1_2": -225.41085815429688, "logps_train/policy_1_l": -207.05636596679688, "logps_train/policy_1_w": -147.42469787597656, "logps_train/policy_2_2": -176.51028442382812, "logps_train/policy_2_w": -191.93881225585938, "logps_train/ref_1_2": -247.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -223.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 2.152663230895996, "rewards_train/1-l": -2.1650123596191406, "rewards_train/1-w": 3.8473734855651855, "rewards_train/2-2": 4.642722129821777, "rewards_train/2-w": 2.337369441986084, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.012385845184326, "rewards_train/margins_1": 1.6947102546691895, "rewards_train/margins_2": 2.3053526878356934, "step": 624 }, { "epoch": 1.87, "logps_train/policy_1_2": -111.06145477294922, "logps_train/policy_1_l": -118.5748291015625, "logps_train/policy_1_w": -116.46598052978516, "logps_train/policy_2_2": -95.21807861328125, "logps_train/policy_2_w": -141.26812744140625, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -105.5, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.9657291173934937, "rewards_train/1-l": -1.3231079578399658, "rewards_train/1-w": 2.7127766609191895, "rewards_train/2-2": 2.4891304969787598, "rewards_train/2-w": 1.7091246843338013, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.035884618759155, "rewards_train/margins_1": 0.7470475435256958, "rewards_train/margins_2": 0.7800058126449585, "step": 624 }, { "epoch": 1.87, "logps_train/policy_1_2": -152.2757568359375, "logps_train/policy_1_l": -139.3006134033203, "logps_train/policy_1_w": -129.08958435058594, "logps_train/policy_2_2": -114.61492919921875, "logps_train/policy_2_w": -171.1548614501953, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.1630489826202393, "rewards_train/1-l": -1.909748911857605, "rewards_train/1-w": 2.8668227195739746, "rewards_train/2-2": 3.118194103240967, "rewards_train/2-w": 0.9056075811386108, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.77657163143158, "rewards_train/margins_1": 1.7037737369537354, "rewards_train/margins_2": 2.212586522102356, "step": 625 }, { "epoch": 1.87, "logps_train/policy_1_2": -114.66439056396484, "logps_train/policy_1_l": -171.46725463867188, "logps_train/policy_1_w": -120.80752563476562, "logps_train/policy_2_2": -98.82925415039062, "logps_train/policy_2_w": -147.56236267089844, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": 1.8257479667663574, "rewards_train/1-l": -1.7779757976531982, "rewards_train/1-w": 2.5912203788757324, "rewards_train/2-2": 2.4553558826446533, "rewards_train/2-w": 1.617884635925293, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.369196176528931, "rewards_train/margins_1": 0.765472412109375, "rewards_train/margins_2": 0.8374712467193604, "step": 625 }, { "epoch": 1.87, "logps_train/policy_1_2": -100.09989929199219, "logps_train/policy_1_l": -123.45494079589844, "logps_train/policy_1_w": -88.85795593261719, "logps_train/policy_2_2": -64.71232604980469, "logps_train/policy_2_w": -132.57162475585938, "logps_train/ref_1_2": -108.5, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 0.8228227496147156, "rewards_train/1-l": -2.2556498050689697, "rewards_train/1-w": 2.8212361335754395, "rewards_train/2-2": 1.9912675619125366, "rewards_train/2-w": 1.0881506204605103, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.076885938644409, "rewards_train/margins_1": 1.9984133839607239, "rewards_train/margins_2": 0.9031169414520264, "step": 625 }, { "epoch": 1.87, "logps_train/policy_1_2": -110.2264633178711, "logps_train/policy_1_l": -141.9456787109375, "logps_train/policy_1_w": -84.05378723144531, "logps_train/policy_2_2": -83.92535400390625, "logps_train/policy_2_w": -110.5714111328125, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": 1.1070408821105957, "rewards_train/1-l": -1.4268420934677124, "rewards_train/1-w": 2.1598563194274902, "rewards_train/2-2": 2.145843982696533, "rewards_train/2-w": 0.9823123812675476, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.5866984128952026, "rewards_train/margins_1": 1.0528154373168945, "rewards_train/margins_2": 1.1635316014289856, "step": 625 }, { "epoch": 1.87, "logps_train/policy_1_2": -152.80349731445312, "logps_train/policy_1_l": -132.96755981445312, "logps_train/policy_1_w": -155.84854125976562, "logps_train/policy_2_2": -119.89020538330078, "logps_train/policy_2_w": -194.63890075683594, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.894650936126709, "rewards_train/1-l": -1.664724588394165, "rewards_train/1-w": 3.383897542953491, "rewards_train/2-2": 3.2047290802001953, "rewards_train/2-w": 1.7564233541488647, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.048622131347656, "rewards_train/margins_1": 1.4892466068267822, "rewards_train/margins_2": 1.4483057260513306, "step": 625 }, { "epoch": 1.87, "logps_train/policy_1_2": -175.64593505859375, "logps_train/policy_1_l": -163.32144165039062, "logps_train/policy_1_w": -120.82622528076172, "logps_train/policy_2_2": -141.796875, "logps_train/policy_2_w": -151.450439453125, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": 1.6807194948196411, "rewards_train/1-l": -1.9118316173553467, "rewards_train/1-w": 2.846088409423828, "rewards_train/2-2": 3.0265631675720215, "rewards_train/2-w": 1.5600346326828003, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.757920026779175, "rewards_train/margins_1": 1.165368914604187, "rewards_train/margins_2": 1.4665285348892212, "step": 625 }, { "epoch": 1.87, "logps_train/policy_1_2": -187.83383178710938, "logps_train/policy_1_l": -184.60031127929688, "logps_train/policy_1_w": -154.39369201660156, "logps_train/policy_2_2": -155.12733459472656, "logps_train/policy_2_w": -199.29226684570312, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 2.100991725921631, "rewards_train/1-l": -2.05456280708313, "rewards_train/1-w": 3.9418814182281494, "rewards_train/2-2": 3.6528921127319336, "rewards_train/2-w": 2.3113982677459717, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.996444225311279, "rewards_train/margins_1": 1.8408896923065186, "rewards_train/margins_2": 1.341493844985962, "step": 625 }, { "epoch": 1.87, "logps_train/policy_1_2": -89.42396545410156, "logps_train/policy_1_l": -90.23749542236328, "logps_train/policy_1_w": -86.69683074951172, "logps_train/policy_2_2": -70.01575469970703, "logps_train/policy_2_w": -112.17688751220703, "logps_train/ref_1_2": -96.5, "logps_train/ref_1_l": -76.5, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -83.5, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 0.7275252938270569, "rewards_train/1-l": -1.3546092510223389, "rewards_train/1-w": 2.136566638946533, "rewards_train/2-2": 1.3249868154525757, "rewards_train/2-w": 1.1166865825653076, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 3.491175889968872, "rewards_train/margins_1": 1.4090413451194763, "rewards_train/margins_2": 0.20830023288726807, "step": 625 }, { "epoch": 1.87, "learning_rate": 5.8815339582966316e-08, "loss": 0.3867, "step": 626 }, { "epoch": 1.87, "logps_train/policy_1_2": -195.64813232421875, "logps_train/policy_1_l": -169.18182373046875, "logps_train/policy_1_w": -142.7822265625, "logps_train/policy_2_2": -142.10037231445312, "logps_train/policy_2_w": -190.97357177734375, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.7633124589920044, "rewards_train/1-l": -1.7955266237258911, "rewards_train/1-w": 3.7131831645965576, "rewards_train/2-2": 4.027463912963867, "rewards_train/2-w": 1.9338936805725098, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.508709788322449, "rewards_train/margins_1": 1.9498707056045532, "rewards_train/margins_2": 2.0935702323913574, "step": 626 }, { "epoch": 1.87, "logps_train/policy_1_2": -151.84207153320312, "logps_train/policy_1_l": -170.83192443847656, "logps_train/policy_1_w": -121.13566589355469, "logps_train/policy_2_2": -98.8281021118164, "logps_train/policy_2_w": -183.9377899169922, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.4720441102981567, "rewards_train/1-l": -2.462099075317383, "rewards_train/1-w": 3.3520584106445312, "rewards_train/2-2": 2.976564884185791, "rewards_train/2-w": 1.5812206268310547, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.814157485961914, "rewards_train/margins_1": 1.8800143003463745, "rewards_train/margins_2": 1.3953442573547363, "step": 626 }, { "epoch": 1.87, "logps_train/policy_1_2": -142.29476928710938, "logps_train/policy_1_l": -159.92697143554688, "logps_train/policy_1_w": -106.54434204101562, "logps_train/policy_2_2": -118.59001159667969, "logps_train/policy_2_w": -127.66490173339844, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.193960428237915, "rewards_train/1-l": -2.575510025024414, "rewards_train/1-w": 2.9213473796844482, "rewards_train/2-2": 2.2796711921691895, "rewards_train/2-w": 2.063978433609009, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.496857404708862, "rewards_train/margins_1": 1.7273869514465332, "rewards_train/margins_2": 0.21569275856018066, "step": 626 }, { "epoch": 1.87, "logps_train/policy_1_2": -202.3096923828125, "logps_train/policy_1_l": -202.2879638671875, "logps_train/policy_1_w": -125.88681030273438, "logps_train/policy_2_2": -159.73851013183594, "logps_train/policy_2_w": -168.1261444091797, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.9198113679885864, "rewards_train/1-l": -1.8298704624176025, "rewards_train/1-w": 3.2593653202056885, "rewards_train/2-2": 3.3488049507141113, "rewards_train/2-w": 1.9217610359191895, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.089235782623291, "rewards_train/margins_1": 1.339553952217102, "rewards_train/margins_2": 1.4270439147949219, "step": 626 }, { "epoch": 1.87, "logps_train/policy_1_2": -135.80157470703125, "logps_train/policy_1_l": -141.370849609375, "logps_train/policy_1_w": -93.86861419677734, "logps_train/policy_2_2": -117.38765716552734, "logps_train/policy_2_w": -111.36085510253906, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.8089046478271484, "rewards_train/1-l": -1.710350751876831, "rewards_train/1-w": 2.5140910148620605, "rewards_train/2-2": 2.604984760284424, "rewards_train/2-w": 1.8639142513275146, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.224441766738892, "rewards_train/margins_1": 0.7051863670349121, "rewards_train/margins_2": 0.7410705089569092, "step": 626 }, { "epoch": 1.87, "logps_train/policy_1_2": -118.6721420288086, "logps_train/policy_1_l": -112.6099624633789, "logps_train/policy_1_w": -141.94122314453125, "logps_train/policy_2_2": -96.41603088378906, "logps_train/policy_2_w": -174.98861694335938, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 2.0312232971191406, "rewards_train/1-l": -1.4555763006210327, "rewards_train/1-w": 3.826190948486328, "rewards_train/2-2": 2.7237048149108887, "rewards_train/2-w": 2.1745765209198, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.281767249107361, "rewards_train/margins_1": 1.7949676513671875, "rewards_train/margins_2": 0.5491282939910889, "step": 626 }, { "epoch": 1.87, "logps_train/policy_1_2": -100.95237731933594, "logps_train/policy_1_l": -115.41665649414062, "logps_train/policy_1_w": -79.06574249267578, "logps_train/policy_2_2": -86.47685241699219, "logps_train/policy_2_w": -93.67141723632812, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -104.5, "logps_train/ref_2_w": -110.5, "rewards_train/1-2": 1.1871836185455322, "rewards_train/1-l": -1.4772117137908936, "rewards_train/1-w": 2.4102225303649902, "rewards_train/2-2": 1.7921593189239502, "rewards_train/2-w": 1.6814907789230347, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.887434244155884, "rewards_train/margins_1": 1.223038911819458, "rewards_train/margins_2": 0.11066854000091553, "step": 626 }, { "epoch": 1.87, "logps_train/policy_1_2": -142.43472290039062, "logps_train/policy_1_l": -188.46102905273438, "logps_train/policy_1_w": -174.52940368652344, "logps_train/policy_2_2": -108.22776794433594, "logps_train/policy_2_w": -232.58737182617188, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -246.0, "rewards_train/1-2": 1.6518394947052002, "rewards_train/1-l": -1.9343833923339844, "rewards_train/1-w": 3.5650291442871094, "rewards_train/2-2": 3.0194108486175537, "rewards_train/2-w": 1.4553253650665283, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.499412536621094, "rewards_train/margins_1": 1.9131896495819092, "rewards_train/margins_2": 1.5640854835510254, "step": 626 }, { "epoch": 1.88, "logps_train/policy_1_2": -191.7202911376953, "logps_train/policy_1_l": -213.32296752929688, "logps_train/policy_1_w": -140.44976806640625, "logps_train/policy_2_2": -140.3812713623047, "logps_train/policy_2_w": -203.59231567382812, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.4935961961746216, "rewards_train/1-l": -2.2822961807250977, "rewards_train/1-w": 4.130023956298828, "rewards_train/2-2": 3.4071855545043945, "rewards_train/2-w": 1.5188930034637451, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.412320137023926, "rewards_train/margins_1": 2.6364277601242065, "rewards_train/margins_2": 1.8882925510406494, "step": 627 }, { "epoch": 1.88, "logps_train/policy_1_2": -190.61805725097656, "logps_train/policy_1_l": -298.2744140625, "logps_train/policy_1_w": -181.9031982421875, "logps_train/policy_2_2": -162.4951629638672, "logps_train/policy_2_w": -218.974853515625, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -252.0, "logps_train/ref_1_w": -221.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -242.0, "rewards_train/1-2": 1.839756965637207, "rewards_train/1-l": -4.652441024780273, "rewards_train/1-w": 3.900304079055786, "rewards_train/2-2": 3.031733989715576, "rewards_train/2-w": 2.2415778636932373, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 8.55274510383606, "rewards_train/margins_1": 2.060547113418579, "rewards_train/margins_2": 0.7901561260223389, "step": 627 }, { "epoch": 1.88, "logps_train/policy_1_2": -147.11788940429688, "logps_train/policy_1_l": -158.20205688476562, "logps_train/policy_1_w": -132.86734008789062, "logps_train/policy_2_2": -92.40208435058594, "logps_train/policy_2_w": -181.04296875, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.8975855112075806, "rewards_train/1-l": -1.723623514175415, "rewards_train/1-w": 3.6163907051086426, "rewards_train/2-2": 3.3910417556762695, "rewards_train/2-w": 2.123828411102295, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.340014219284058, "rewards_train/margins_1": 1.718805193901062, "rewards_train/margins_2": 1.2672133445739746, "step": 627 }, { "epoch": 1.88, "logps_train/policy_1_2": -185.0443115234375, "logps_train/policy_1_l": -113.5760726928711, "logps_train/policy_1_w": -154.29563903808594, "logps_train/policy_2_2": -136.3780059814453, "logps_train/policy_2_w": -195.83456420898438, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": 1.0877554416656494, "rewards_train/1-l": -0.9624900817871094, "rewards_train/1-w": 3.339186191558838, "rewards_train/2-2": 3.137199640274048, "rewards_train/2-w": 1.56498122215271, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.301676273345947, "rewards_train/margins_1": 2.2514307498931885, "rewards_train/margins_2": 1.572218418121338, "step": 627 }, { "epoch": 1.88, "logps_train/policy_1_2": -204.7259521484375, "logps_train/policy_1_l": -155.57398986816406, "logps_train/policy_1_w": -106.49183654785156, "logps_train/policy_2_2": -158.10635375976562, "logps_train/policy_2_w": -135.97186279296875, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.4746711254119873, "rewards_train/1-l": -1.7413833141326904, "rewards_train/1-w": 2.803941488265991, "rewards_train/2-2": 3.349911689758301, "rewards_train/2-w": 1.8309382200241089, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.545324802398682, "rewards_train/margins_1": 1.329270362854004, "rewards_train/margins_2": 1.518973469734192, "step": 627 }, { "epoch": 1.88, "logps_train/policy_1_2": -305.2444152832031, "logps_train/policy_1_l": -191.5498046875, "logps_train/policy_1_w": -136.9625701904297, "logps_train/policy_2_2": -256.6223449707031, "logps_train/policy_2_w": -171.04196166992188, "logps_train/ref_1_2": -324.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -292.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.8005568981170654, "rewards_train/1-l": -1.862304449081421, "rewards_train/1-w": 3.392805814743042, "rewards_train/2-2": 3.548704147338867, "rewards_train/2-w": 1.9801790714263916, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.255110263824463, "rewards_train/margins_1": 1.5922489166259766, "rewards_train/margins_2": 1.5685250759124756, "step": 627 }, { "epoch": 1.88, "logps_train/policy_1_2": -174.62403869628906, "logps_train/policy_1_l": -201.0350799560547, "logps_train/policy_1_w": -135.4220428466797, "logps_train/policy_2_2": -129.9788360595703, "logps_train/policy_2_w": -176.09230041503906, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 1.6469709873199463, "rewards_train/1-l": -1.7058520317077637, "rewards_train/1-w": 3.579085350036621, "rewards_train/2-2": 3.0833659172058105, "rewards_train/2-w": 1.9798322916030884, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.284937381744385, "rewards_train/margins_1": 1.9321143627166748, "rewards_train/margins_2": 1.1035336256027222, "step": 627 }, { "epoch": 1.88, "logps_train/policy_1_2": -170.684326171875, "logps_train/policy_1_l": -148.87078857421875, "logps_train/policy_1_w": -132.05300903320312, "logps_train/policy_2_2": -138.92379760742188, "logps_train/policy_2_w": -167.86041259765625, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.870628833770752, "rewards_train/1-l": -1.2464535236358643, "rewards_train/1-w": 3.3556368350982666, "rewards_train/2-2": 2.8982455730438232, "rewards_train/2-w": 1.96396005153656, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.602090358734131, "rewards_train/margins_1": 1.4850080013275146, "rewards_train/margins_2": 0.9342855215072632, "step": 627 }, { "epoch": 1.88, "learning_rate": 5.360876925123992e-08, "loss": 0.3399, "step": 628 }, { "epoch": 1.88, "logps_train/policy_1_2": -214.29287719726562, "logps_train/policy_1_l": -178.63912963867188, "logps_train/policy_1_w": -96.65797424316406, "logps_train/policy_2_2": -159.241455078125, "logps_train/policy_2_w": -124.4839096069336, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 0.9707131385803223, "rewards_train/1-l": -2.23266339302063, "rewards_train/1-w": 2.6160378456115723, "rewards_train/2-2": 3.4758548736572266, "rewards_train/2-w": 1.7156713008880615, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.848701238632202, "rewards_train/margins_1": 1.64532470703125, "rewards_train/margins_2": 1.760183572769165, "step": 628 }, { "epoch": 1.88, "logps_train/policy_1_2": -195.6172637939453, "logps_train/policy_1_l": -177.42919921875, "logps_train/policy_1_w": -126.45535278320312, "logps_train/policy_2_2": -150.9014892578125, "logps_train/policy_2_w": -165.7262420654297, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 2.1726489067077637, "rewards_train/1-l": -3.272998809814453, "rewards_train/1-w": 3.885714054107666, "rewards_train/2-2": 4.297350883483887, "rewards_train/2-w": 2.4062819480895996, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.158712863922119, "rewards_train/margins_1": 1.7130651473999023, "rewards_train/margins_2": 1.891068935394287, "step": 628 }, { "epoch": 1.88, "logps_train/policy_1_2": -148.15203857421875, "logps_train/policy_1_l": -197.28152465820312, "logps_train/policy_1_w": -116.93461608886719, "logps_train/policy_2_2": -124.0049057006836, "logps_train/policy_2_w": -152.20729064941406, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 2.2270333766937256, "rewards_train/1-l": -1.5799086093902588, "rewards_train/1-w": 3.453462600708008, "rewards_train/2-2": 3.0387673377990723, "rewards_train/2-w": 2.3185276985168457, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.033371210098267, "rewards_train/margins_1": 1.2264292240142822, "rewards_train/margins_2": 0.7202396392822266, "step": 628 }, { "epoch": 1.88, "logps_train/policy_1_2": -130.6459503173828, "logps_train/policy_1_l": -93.94896697998047, "logps_train/policy_1_w": -97.042236328125, "logps_train/policy_2_2": -89.48113250732422, "logps_train/policy_2_w": -135.9034423828125, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -84.5, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -117.5, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.133061170578003, "rewards_train/1-l": -0.9544672966003418, "rewards_train/1-w": 3.306713342666626, "rewards_train/2-2": 2.8018863201141357, "rewards_train/2-w": 1.5346566438674927, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.261180639266968, "rewards_train/margins_1": 2.173652172088623, "rewards_train/margins_2": 1.267229676246643, "step": 628 }, { "epoch": 1.88, "logps_train/policy_1_2": -143.63328552246094, "logps_train/policy_1_l": -120.90435028076172, "logps_train/policy_1_w": -101.21383666992188, "logps_train/policy_2_2": -104.66787719726562, "logps_train/policy_2_w": -141.59718322753906, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.7491717338562012, "rewards_train/1-l": -1.4285210371017456, "rewards_train/1-w": 3.10205340385437, "rewards_train/2-2": 3.185556650161743, "rewards_train/2-w": 1.3402812480926514, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.530574440956116, "rewards_train/margins_1": 1.352881669998169, "rewards_train/margins_2": 1.8452754020690918, "step": 628 }, { "epoch": 1.88, "logps_train/policy_1_2": -269.7386169433594, "logps_train/policy_1_l": -225.58450317382812, "logps_train/policy_1_w": -113.90054321289062, "logps_train/policy_2_2": -195.34915161132812, "logps_train/policy_2_w": -172.36953735351562, "logps_train/ref_1_2": -282.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -238.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.1886374950408936, "rewards_train/1-l": -2.9232921600341797, "rewards_train/1-w": 2.9951016902923584, "rewards_train/2-2": 4.277585983276367, "rewards_train/2-w": 1.325545310974121, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.918393850326538, "rewards_train/margins_1": 1.8064641952514648, "rewards_train/margins_2": 2.952040672302246, "step": 628 }, { "epoch": 1.88, "logps_train/policy_1_2": -180.6615753173828, "logps_train/policy_1_l": -192.43528747558594, "logps_train/policy_1_w": -108.00887298583984, "logps_train/policy_2_2": -136.5965576171875, "logps_train/policy_2_w": -142.56280517578125, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.649467945098877, "rewards_train/1-l": -2.210325241088867, "rewards_train/1-w": 3.004971742630005, "rewards_train/2-2": 3.247375965118408, "rewards_train/2-w": 1.6855154037475586, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.215296983718872, "rewards_train/margins_1": 1.355503797531128, "rewards_train/margins_2": 1.5618605613708496, "step": 628 }, { "epoch": 1.88, "logps_train/policy_1_2": -151.947021484375, "logps_train/policy_1_l": -111.05404663085938, "logps_train/policy_1_w": -91.57217407226562, "logps_train/policy_2_2": -112.59809875488281, "logps_train/policy_2_w": -116.28498840332031, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.0615472793579102, "rewards_train/1-l": -1.6213231086730957, "rewards_train/1-w": 2.842782497406006, "rewards_train/2-2": 3.0183143615722656, "rewards_train/2-w": 1.5840015411376953, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.464105606079102, "rewards_train/margins_1": 1.7812352180480957, "rewards_train/margins_2": 1.4343128204345703, "step": 628 }, { "epoch": 1.88, "logps_train/policy_1_2": -111.97694396972656, "logps_train/policy_1_l": -155.64279174804688, "logps_train/policy_1_w": -134.61802673339844, "logps_train/policy_2_2": -82.43191528320312, "logps_train/policy_2_w": -170.8492431640625, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.589806079864502, "rewards_train/1-l": -1.9392781257629395, "rewards_train/1-w": 2.8163230419158936, "rewards_train/2-2": 2.656808376312256, "rewards_train/2-w": 1.257261872291565, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.755601167678833, "rewards_train/margins_1": 1.2265169620513916, "rewards_train/margins_2": 1.399546504020691, "step": 629 }, { "epoch": 1.88, "logps_train/policy_1_2": -138.50363159179688, "logps_train/policy_1_l": -224.85484313964844, "logps_train/policy_1_w": -134.10986328125, "logps_train/policy_2_2": -101.05442810058594, "logps_train/policy_2_w": -175.19097900390625, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.7433868646621704, "rewards_train/1-l": -3.446617603302002, "rewards_train/1-w": 3.8890132904052734, "rewards_train/2-2": 3.069556713104248, "rewards_train/2-w": 1.7559013366699219, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.335630893707275, "rewards_train/margins_1": 2.145626425743103, "rewards_train/margins_2": 1.3136553764343262, "step": 629 }, { "epoch": 1.88, "logps_train/policy_1_2": -249.2776641845703, "logps_train/policy_1_l": -266.830078125, "logps_train/policy_1_w": -175.3861083984375, "logps_train/policy_2_2": -181.35006713867188, "logps_train/policy_2_w": -242.59579467773438, "logps_train/ref_1_2": -264.0, "logps_train/ref_1_l": -240.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -258.0, "rewards_train/1-2": 1.46598482131958, "rewards_train/1-l": -2.757225513458252, "rewards_train/1-w": 4.142639636993408, "rewards_train/2-2": 4.133744239807129, "rewards_train/2-w": 1.5216715335845947, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.89986515045166, "rewards_train/margins_1": 2.676654815673828, "rewards_train/margins_2": 2.612072706222534, "step": 629 }, { "epoch": 1.88, "logps_train/policy_1_2": -113.03753662109375, "logps_train/policy_1_l": -136.98532104492188, "logps_train/policy_1_w": -65.49200439453125, "logps_train/policy_2_2": -90.64918518066406, "logps_train/policy_2_w": -87.40745544433594, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -100.5, "rewards_train/1-2": 1.3153866529464722, "rewards_train/1-l": -1.5977509021759033, "rewards_train/1-w": 2.3152530193328857, "rewards_train/2-2": 2.2124247550964355, "rewards_train/2-w": 1.3108177185058594, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.913003921508789, "rewards_train/margins_1": 0.9998663663864136, "rewards_train/margins_2": 0.9016070365905762, "step": 629 }, { "epoch": 1.88, "logps_train/policy_1_2": -236.1847381591797, "logps_train/policy_1_l": -286.1566467285156, "logps_train/policy_1_w": -169.45132446289062, "logps_train/policy_2_2": -190.594482421875, "logps_train/policy_2_w": -214.93081665039062, "logps_train/ref_1_2": -256.0, "logps_train/ref_1_l": -256.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -224.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.9409005641937256, "rewards_train/1-l": -2.890662670135498, "rewards_train/1-w": 3.461118459701538, "rewards_train/2-2": 3.4155516624450684, "rewards_train/2-w": 1.8944180011749268, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.351781129837036, "rewards_train/margins_1": 1.5202178955078125, "rewards_train/margins_2": 1.5211336612701416, "step": 629 }, { "epoch": 1.88, "logps_train/policy_1_2": -95.34447479248047, "logps_train/policy_1_l": -80.1872329711914, "logps_train/policy_1_w": -74.02728271484375, "logps_train/policy_2_2": -74.22381591796875, "logps_train/policy_2_w": -96.7013931274414, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -99.5, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 1.6721928119659424, "rewards_train/1-l": -1.014035701751709, "rewards_train/1-w": 2.7675838470458984, "rewards_train/2-2": 2.5338690280914307, "rewards_train/2-w": 1.711111307144165, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7816195487976074, "rewards_train/margins_1": 1.095391035079956, "rewards_train/margins_2": 0.8227577209472656, "step": 629 }, { "epoch": 1.88, "logps_train/policy_1_2": -151.4907684326172, "logps_train/policy_1_l": -176.4889373779297, "logps_train/policy_1_w": -101.25675201416016, "logps_train/policy_2_2": -118.17689514160156, "logps_train/policy_2_w": -131.31350708007812, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 1.2472121715545654, "rewards_train/1-l": -2.669987440109253, "rewards_train/1-w": 2.5335042476654053, "rewards_train/2-2": 2.5627803802490234, "rewards_train/2-w": 1.3612267971038818, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.203491687774658, "rewards_train/margins_1": 1.2862920761108398, "rewards_train/margins_2": 1.2015535831451416, "step": 629 }, { "epoch": 1.88, "logps_train/policy_1_2": -162.48458862304688, "logps_train/policy_1_l": -210.03712463378906, "logps_train/policy_1_w": -158.4256134033203, "logps_train/policy_2_2": -122.08475494384766, "logps_train/policy_2_w": -216.22817993164062, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.5437288284301758, "rewards_train/1-l": -1.409963607788086, "rewards_train/1-w": 4.559000015258789, "rewards_train/2-2": 2.905195951461792, "rewards_train/2-w": 2.1553077697753906, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.968963623046875, "rewards_train/margins_1": 3.0152711868286133, "rewards_train/margins_2": 0.7498881816864014, "step": 629 }, { "epoch": 1.89, "learning_rate": 4.864096239091287e-08, "loss": 0.338, "step": 630 }, { "epoch": 1.89, "logps_train/policy_1_2": -175.14634704589844, "logps_train/policy_1_l": -186.17889404296875, "logps_train/policy_1_w": -122.45330047607422, "logps_train/policy_2_2": -129.3001708984375, "logps_train/policy_2_w": -154.2174072265625, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.6134905815124512, "rewards_train/1-l": -2.188202381134033, "rewards_train/1-w": 3.3202948570251465, "rewards_train/2-2": 2.9996702671051025, "rewards_train/2-w": 1.7626339197158813, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.50849723815918, "rewards_train/margins_1": 1.7068042755126953, "rewards_train/margins_2": 1.2370363473892212, "step": 630 }, { "epoch": 1.89, "logps_train/policy_1_2": -167.05198669433594, "logps_train/policy_1_l": -146.46652221679688, "logps_train/policy_1_w": -136.6671600341797, "logps_train/policy_2_2": -125.68941497802734, "logps_train/policy_2_w": -178.9121856689453, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.8151135444641113, "rewards_train/1-l": -1.5893292427062988, "rewards_train/1-w": 3.033285140991211, "rewards_train/2-2": 3.7341840267181396, "rewards_train/2-w": 1.290031909942627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.62261438369751, "rewards_train/margins_1": 1.2181715965270996, "rewards_train/margins_2": 2.4441521167755127, "step": 630 }, { "epoch": 1.89, "logps_train/policy_1_2": -96.31156158447266, "logps_train/policy_1_l": -118.02249145507812, "logps_train/policy_1_w": -96.74581909179688, "logps_train/policy_2_2": -71.35853576660156, "logps_train/policy_2_w": -122.98951721191406, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -90.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 0.7821253538131714, "rewards_train/1-l": -1.7773470878601074, "rewards_train/1-w": 1.9934360980987549, "rewards_train/2-2": 1.8653180599212646, "rewards_train/2-w": 0.5074453353881836, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7707831859588623, "rewards_train/margins_1": 1.2113107442855835, "rewards_train/margins_2": 1.357872724533081, "step": 630 }, { "epoch": 1.89, "logps_train/policy_1_2": -118.05474090576172, "logps_train/policy_1_l": -89.87181854248047, "logps_train/policy_1_w": -71.95664978027344, "logps_train/policy_2_2": -82.16972351074219, "logps_train/policy_2_w": -90.27490997314453, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -73.0, "logps_train/ref_1_w": -92.5, "logps_train/ref_2_2": -104.5, "logps_train/ref_2_w": -102.5, "rewards_train/1-2": 0.7664003372192383, "rewards_train/1-l": -1.6994872093200684, "rewards_train/1-w": 2.0622458457946777, "rewards_train/2-2": 2.220527172088623, "rewards_train/2-w": 1.2146962881088257, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.761733055114746, "rewards_train/margins_1": 1.2958455085754395, "rewards_train/margins_2": 1.0058308839797974, "step": 630 }, { "epoch": 1.89, "logps_train/policy_1_2": -78.42564392089844, "logps_train/policy_1_l": -111.14192199707031, "logps_train/policy_1_w": -39.77338409423828, "logps_train/policy_2_2": -52.57533264160156, "logps_train/policy_2_w": -60.76849365234375, "logps_train/ref_1_2": -83.0, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -55.0, "logps_train/ref_2_2": -67.5, "logps_train/ref_2_w": -69.0, "rewards_train/1-2": 0.4563612639904022, "rewards_train/1-l": -1.2199535369873047, "rewards_train/1-w": 1.5129934549331665, "rewards_train/2-2": 1.4935410022735596, "rewards_train/2-w": 0.8238341808319092, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.732946991920471, "rewards_train/margins_1": 1.0566321909427643, "rewards_train/margins_2": 0.6697068214416504, "step": 630 }, { "epoch": 1.89, "logps_train/policy_1_2": -157.8624267578125, "logps_train/policy_1_l": -175.8909912109375, "logps_train/policy_1_w": -142.04913330078125, "logps_train/policy_2_2": -105.27378845214844, "logps_train/policy_2_w": -206.62921142578125, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 1.8102405071258545, "rewards_train/1-l": -1.780700922012329, "rewards_train/1-w": 3.2892274856567383, "rewards_train/2-2": 3.5191054344177246, "rewards_train/2-w": 1.0530942678451538, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.069928407669067, "rewards_train/margins_1": 1.4789869785308838, "rewards_train/margins_2": 2.466011166572571, "step": 630 }, { "epoch": 1.89, "logps_train/policy_1_2": -125.35946655273438, "logps_train/policy_1_l": -147.94007873535156, "logps_train/policy_1_w": -111.92811584472656, "logps_train/policy_2_2": -95.767578125, "logps_train/policy_2_w": -140.11599731445312, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 1.7359288930892944, "rewards_train/1-l": -1.6279923915863037, "rewards_train/1-w": 1.7165634632110596, "rewards_train/2-2": 2.781054973602295, "rewards_train/2-w": 0.682541012763977, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.3445558547973633, "rewards_train/margins_1": -0.019365429878234863, "rewards_train/margins_2": 2.098513960838318, "step": 630 }, { "epoch": 1.89, "logps_train/policy_1_2": -229.475830078125, "logps_train/policy_1_l": -190.36683654785156, "logps_train/policy_1_w": -142.6756134033203, "logps_train/policy_2_2": -184.0167236328125, "logps_train/policy_2_w": -174.821044921875, "logps_train/ref_1_2": -247.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -219.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.7524164915084839, "rewards_train/1-l": -1.4159802198410034, "rewards_train/1-w": 2.698064088821411, "rewards_train/2-2": 3.5139522552490234, "rewards_train/2-w": 1.6616456508636475, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.1140443086624146, "rewards_train/margins_1": 0.9456475973129272, "rewards_train/margins_2": 1.852306604385376, "step": 630 }, { "epoch": 1.89, "logps_train/policy_1_2": -172.9324951171875, "logps_train/policy_1_l": -157.6564483642578, "logps_train/policy_1_w": -114.63851928710938, "logps_train/policy_2_2": -139.50416564941406, "logps_train/policy_2_w": -150.29833984375, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 1.5848755836486816, "rewards_train/1-l": -1.7734568119049072, "rewards_train/1-w": 2.924428939819336, "rewards_train/2-2": 3.025364637374878, "rewards_train/2-w": 1.8811030387878418, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.697885751724243, "rewards_train/margins_1": 1.3395533561706543, "rewards_train/margins_2": 1.1442615985870361, "step": 631 }, { "epoch": 1.89, "logps_train/policy_1_2": -190.7227020263672, "logps_train/policy_1_l": -168.29067993164062, "logps_train/policy_1_w": -160.71092224121094, "logps_train/policy_2_2": -156.2626190185547, "logps_train/policy_2_w": -208.8024444580078, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 2.417182683944702, "rewards_train/1-l": -0.8935210704803467, "rewards_train/1-w": 4.238283157348633, "rewards_train/2-2": 3.771395206451416, "rewards_train/2-w": 2.4916305541992188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.1318042278289795, "rewards_train/margins_1": 1.8211004734039307, "rewards_train/margins_2": 1.2797646522521973, "step": 631 }, { "epoch": 1.89, "logps_train/policy_1_2": -99.989501953125, "logps_train/policy_1_l": -102.93861389160156, "logps_train/policy_1_w": -51.89265823364258, "logps_train/policy_2_2": -82.7941665649414, "logps_train/policy_2_w": -67.38424682617188, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -72.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -83.5, "rewards_train/1-2": 1.1550538539886475, "rewards_train/1-l": -1.594643235206604, "rewards_train/1-w": 2.001359224319458, "rewards_train/2-2": 1.9882590770721436, "rewards_train/2-w": 1.6225132942199707, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.596002459526062, "rewards_train/margins_1": 0.8463053703308105, "rewards_train/margins_2": 0.36574578285217285, "step": 631 }, { "epoch": 1.89, "logps_train/policy_1_2": -177.04270935058594, "logps_train/policy_1_l": -133.5821990966797, "logps_train/policy_1_w": -117.41075134277344, "logps_train/policy_2_2": -141.62930297851562, "logps_train/policy_2_w": -148.58236694335938, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -114.5, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.756666660308838, "rewards_train/1-l": -1.876969575881958, "rewards_train/1-w": 3.1706435680389404, "rewards_train/2-2": 3.306210517883301, "rewards_train/2-w": 1.7597324848175049, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.047613143920898, "rewards_train/margins_1": 1.4139769077301025, "rewards_train/margins_2": 1.546478033065796, "step": 631 }, { "epoch": 1.89, "logps_train/policy_1_2": -192.40542602539062, "logps_train/policy_1_l": -184.91259765625, "logps_train/policy_1_w": -139.0614776611328, "logps_train/policy_2_2": -144.95098876953125, "logps_train/policy_2_w": -193.69281005859375, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 0.8797695636749268, "rewards_train/1-l": -1.8365726470947266, "rewards_train/1-w": 3.9610400199890137, "rewards_train/2-2": 2.7736520767211914, "rewards_train/2-w": 1.7588443756103516, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.79761266708374, "rewards_train/margins_1": 3.081270456314087, "rewards_train/margins_2": 1.0148077011108398, "step": 631 }, { "epoch": 1.89, "logps_train/policy_1_2": -198.11557006835938, "logps_train/policy_1_l": -289.897705078125, "logps_train/policy_1_w": -228.68582153320312, "logps_train/policy_2_2": -151.28500366210938, "logps_train/policy_2_w": -300.763671875, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -264.0, "logps_train/ref_1_w": -268.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -316.0, "rewards_train/1-2": 1.8446935415267944, "rewards_train/1-l": -2.6866464614868164, "rewards_train/1-w": 3.9689180850982666, "rewards_train/2-2": 3.2074365615844727, "rewards_train/2-w": 1.5673816204071045, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.655564546585083, "rewards_train/margins_1": 2.124224543571472, "rewards_train/margins_2": 1.6400549411773682, "step": 631 }, { "epoch": 1.89, "logps_train/policy_1_2": -66.18514251708984, "logps_train/policy_1_l": -146.69322204589844, "logps_train/policy_1_w": -60.0073356628418, "logps_train/policy_2_2": -52.047096252441406, "logps_train/policy_2_w": -83.8409194946289, "logps_train/ref_1_2": -76.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -82.0, "logps_train/ref_2_2": -68.0, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": 0.9892985820770264, "rewards_train/1-l": -2.394711971282959, "rewards_train/1-w": 2.2258288860321045, "rewards_train/2-2": 1.589821696281433, "rewards_train/2-w": 1.0815330743789673, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.6205408573150635, "rewards_train/margins_1": 1.2365303039550781, "rewards_train/margins_2": 0.5082886219024658, "step": 631 }, { "epoch": 1.89, "logps_train/policy_1_2": -112.60358428955078, "logps_train/policy_1_l": -148.9632568359375, "logps_train/policy_1_w": -80.23638153076172, "logps_train/policy_2_2": -92.4571533203125, "logps_train/policy_2_w": -119.17012786865234, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.6333918571472168, "rewards_train/1-l": -3.039635419845581, "rewards_train/1-w": 3.4576125144958496, "rewards_train/2-2": 2.482409954071045, "rewards_train/2-w": 1.6783000230789185, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.497247934341431, "rewards_train/margins_1": 1.8242206573486328, "rewards_train/margins_2": 0.8041099309921265, "step": 631 }, { "epoch": 1.89, "learning_rate": 4.391240385117623e-08, "loss": 0.5067, "step": 632 }, { "epoch": 1.89, "logps_train/policy_1_2": -214.12429809570312, "logps_train/policy_1_l": -200.2681427001953, "logps_train/policy_1_w": -205.07858276367188, "logps_train/policy_2_2": -163.1151580810547, "logps_train/policy_2_w": -278.42022705078125, "logps_train/ref_1_2": -233.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -248.0, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -296.0, "rewards_train/1-2": 1.8336646556854248, "rewards_train/1-l": -1.9943921566009521, "rewards_train/1-w": 4.2655792236328125, "rewards_train/2-2": 3.4220786094665527, "rewards_train/2-w": 1.6736012697219849, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.259971380233765, "rewards_train/margins_1": 2.4319145679473877, "rewards_train/margins_2": 1.7484773397445679, "step": 632 }, { "epoch": 1.89, "logps_train/policy_1_2": -114.02874755859375, "logps_train/policy_1_l": -84.42192077636719, "logps_train/policy_1_w": -85.94400024414062, "logps_train/policy_2_2": -81.72137451171875, "logps_train/policy_2_w": -112.50628662109375, "logps_train/ref_1_2": -119.5, "logps_train/ref_1_l": -69.5, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": 0.5549378991127014, "rewards_train/1-l": -1.4925827980041504, "rewards_train/1-w": 1.7591160535812378, "rewards_train/2-2": 1.98958158493042, "rewards_train/2-w": 0.29468420147895813, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.251698851585388, "rewards_train/margins_1": 1.2041781544685364, "rewards_train/margins_2": 1.6948973834514618, "step": 632 }, { "epoch": 1.89, "logps_train/policy_1_2": -116.75999450683594, "logps_train/policy_1_l": -82.08578491210938, "logps_train/policy_1_w": -79.96275329589844, "logps_train/policy_2_2": -90.59258270263672, "logps_train/policy_2_w": -105.01101684570312, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -68.0, "logps_train/ref_1_w": -97.5, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -109.5, "rewards_train/1-2": 0.9193136096000671, "rewards_train/1-l": -1.4055509567260742, "rewards_train/1-w": 1.7394664287567139, "rewards_train/2-2": 2.0641796588897705, "rewards_train/2-w": 0.4617885649204254, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.145017385482788, "rewards_train/margins_1": 0.8201528191566467, "rewards_train/margins_2": 1.602391093969345, "step": 632 }, { "epoch": 1.89, "logps_train/policy_1_2": -80.00658416748047, "logps_train/policy_1_l": -58.17512893676758, "logps_train/policy_1_w": -47.221038818359375, "logps_train/policy_2_2": -54.24226379394531, "logps_train/policy_2_w": -77.64759826660156, "logps_train/ref_1_2": -85.0, "logps_train/ref_1_l": -41.25, "logps_train/ref_1_w": -67.0, "logps_train/ref_2_2": -72.5, "logps_train/ref_2_w": -83.5, "rewards_train/1-2": 0.5196542739868164, "rewards_train/1-l": -1.674544095993042, "rewards_train/1-w": 1.9599276781082153, "rewards_train/2-2": 1.8187428712844849, "rewards_train/2-w": 0.5703961253166199, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6344717741012573, "rewards_train/margins_1": 1.440273404121399, "rewards_train/margins_2": 1.248346745967865, "step": 632 }, { "epoch": 1.89, "logps_train/policy_1_2": -112.7385025024414, "logps_train/policy_1_l": -144.92286682128906, "logps_train/policy_1_w": -145.2460479736328, "logps_train/policy_2_2": -83.543701171875, "logps_train/policy_2_w": -185.81561279296875, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": 1.8562281131744385, "rewards_train/1-l": -2.2952170372009277, "rewards_train/1-w": 3.168365478515625, "rewards_train/2-2": 2.7262940406799316, "rewards_train/2-w": 0.960626482963562, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.463582515716553, "rewards_train/margins_1": 1.3121373653411865, "rewards_train/margins_2": 1.7656675577163696, "step": 632 }, { "epoch": 1.89, "logps_train/policy_1_2": -157.41082763671875, "logps_train/policy_1_l": -269.06390380859375, "logps_train/policy_1_w": -170.88812255859375, "logps_train/policy_2_2": -132.73287963867188, "logps_train/policy_2_w": -212.91525268554688, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -235.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 2.8136045932769775, "rewards_train/1-l": -3.3938913345336914, "rewards_train/1-w": 4.186186790466309, "rewards_train/2-2": 3.461087703704834, "rewards_train/2-w": 2.070974111557007, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.580078125, "rewards_train/margins_1": 1.372582197189331, "rewards_train/margins_2": 1.3901135921478271, "step": 632 }, { "epoch": 1.89, "logps_train/policy_1_2": -84.92024230957031, "logps_train/policy_1_l": -91.26870727539062, "logps_train/policy_1_w": -91.73689270019531, "logps_train/policy_2_2": -60.02476501464844, "logps_train/policy_2_w": -126.88336181640625, "logps_train/ref_1_2": -99.5, "logps_train/ref_1_l": -73.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -82.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.4579763412475586, "rewards_train/1-l": -1.8378076553344727, "rewards_train/1-w": 2.544279098510742, "rewards_train/2-2": 2.201429605484009, "rewards_train/2-w": 0.9101011753082275, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.382086753845215, "rewards_train/margins_1": 1.0863027572631836, "rewards_train/margins_2": 1.2913284301757812, "step": 632 }, { "epoch": 1.89, "logps_train/policy_1_2": -236.20977783203125, "logps_train/policy_1_l": -178.1940460205078, "logps_train/policy_1_w": -130.53903198242188, "logps_train/policy_2_2": -172.3437957763672, "logps_train/policy_2_w": -184.61013793945312, "logps_train/ref_1_2": -252.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.5102710723876953, "rewards_train/1-l": -2.443916082382202, "rewards_train/1-w": 3.5265650749206543, "rewards_train/2-2": 4.13437032699585, "rewards_train/2-w": 1.9444550275802612, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.9704811573028564, "rewards_train/margins_1": 2.016294002532959, "rewards_train/margins_2": 2.1899152994155884, "step": 632 }, { "epoch": 1.9, "logps_train/policy_1_2": -89.8858642578125, "logps_train/policy_1_l": -89.44512176513672, "logps_train/policy_1_w": -77.62511444091797, "logps_train/policy_2_2": -67.44772338867188, "logps_train/policy_2_w": -110.04670715332031, "logps_train/ref_1_2": -98.5, "logps_train/ref_1_l": -71.5, "logps_train/ref_1_w": -98.5, "logps_train/ref_2_2": -86.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 0.8629758358001709, "rewards_train/1-l": -1.8022270202636719, "rewards_train/1-w": 2.08841609954834, "rewards_train/2-2": 1.8200719356536865, "rewards_train/2-w": 0.6030440330505371, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8906431198120117, "rewards_train/margins_1": 1.225440263748169, "rewards_train/margins_2": 1.2170279026031494, "step": 633 }, { "epoch": 1.9, "logps_train/policy_1_2": -128.1978302001953, "logps_train/policy_1_l": -118.1343765258789, "logps_train/policy_1_w": -85.2503890991211, "logps_train/policy_2_2": -92.87655639648438, "logps_train/policy_2_w": -115.82070922851562, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": 0.8145919442176819, "rewards_train/1-l": -2.2878518104553223, "rewards_train/1-w": 2.403085947036743, "rewards_train/2-2": 2.4967193603515625, "rewards_train/2-w": 1.0934171676635742, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.690937757492065, "rewards_train/margins_1": 1.5884940028190613, "rewards_train/margins_2": 1.4033021926879883, "step": 633 }, { "epoch": 1.9, "logps_train/policy_1_2": -111.24378967285156, "logps_train/policy_1_l": -190.82345581054688, "logps_train/policy_1_w": -110.18389892578125, "logps_train/policy_2_2": -89.00810241699219, "logps_train/policy_2_w": -140.2928466796875, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.2486681938171387, "rewards_train/1-l": -2.719454288482666, "rewards_train/1-w": 3.0620784759521484, "rewards_train/2-2": 2.1706745624542236, "rewards_train/2-w": 1.7550907135009766, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.7815327644348145, "rewards_train/margins_1": 1.8134102821350098, "rewards_train/margins_2": 0.41558384895324707, "step": 633 }, { "epoch": 1.9, "logps_train/policy_1_2": -233.89279174804688, "logps_train/policy_1_l": -281.6275634765625, "logps_train/policy_1_w": -153.27972412109375, "logps_train/policy_2_2": -178.80487060546875, "logps_train/policy_2_w": -191.45602416992188, "logps_train/ref_1_2": -249.0, "logps_train/ref_1_l": -244.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": 1.5857205390930176, "rewards_train/1-l": -3.8186159133911133, "rewards_train/1-w": 3.4515187740325928, "rewards_train/2-2": 4.300764083862305, "rewards_train/2-w": 2.2106480598449707, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.270134687423706, "rewards_train/margins_1": 1.8657982349395752, "rewards_train/margins_2": 2.090116024017334, "step": 633 }, { "epoch": 1.9, "logps_train/policy_1_2": -222.49192810058594, "logps_train/policy_1_l": -223.49172973632812, "logps_train/policy_1_w": -137.3248748779297, "logps_train/policy_2_2": -170.28256225585938, "logps_train/policy_2_w": -194.78152465820312, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -207.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -207.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.296119213104248, "rewards_train/1-l": -1.673782467842102, "rewards_train/1-w": 3.1167306900024414, "rewards_train/2-2": 3.673306941986084, "rewards_train/2-w": 1.504659652709961, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.7905131578445435, "rewards_train/margins_1": 1.8206114768981934, "rewards_train/margins_2": 2.168647289276123, "step": 633 }, { "epoch": 1.9, "logps_train/policy_1_2": -172.1261749267578, "logps_train/policy_1_l": -183.91348266601562, "logps_train/policy_1_w": -137.6663818359375, "logps_train/policy_2_2": -146.07431030273438, "logps_train/policy_2_w": -176.30447387695312, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 1.829228162765503, "rewards_train/1-l": -1.5483797788619995, "rewards_train/1-w": 3.154162883758545, "rewards_train/2-2": 2.9197168350219727, "rewards_train/2-w": 1.5267783403396606, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.702542662620544, "rewards_train/margins_1": 1.324934720993042, "rewards_train/margins_2": 1.392938494682312, "step": 633 }, { "epoch": 1.9, "logps_train/policy_1_2": -217.9417724609375, "logps_train/policy_1_l": -160.08465576171875, "logps_train/policy_1_w": -124.35835266113281, "logps_train/policy_2_2": -170.3416290283203, "logps_train/policy_2_w": -195.3271484375, "logps_train/ref_1_2": -237.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.8933234214782715, "rewards_train/1-l": -1.9744806289672852, "rewards_train/1-w": 3.822758197784424, "rewards_train/2-2": 3.623649835586548, "rewards_train/2-w": 1.335645079612732, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.797238826751709, "rewards_train/margins_1": 1.9294347763061523, "rewards_train/margins_2": 2.288004755973816, "step": 633 }, { "epoch": 1.9, "logps_train/policy_1_2": -167.20843505859375, "logps_train/policy_1_l": -192.68655395507812, "logps_train/policy_1_w": -140.24514770507812, "logps_train/policy_2_2": -127.21049499511719, "logps_train/policy_2_w": -193.96104431152344, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 1.7744693756103516, "rewards_train/1-l": -2.3495144844055176, "rewards_train/1-w": 3.7567360401153564, "rewards_train/2-2": 3.234419107437134, "rewards_train/2-w": 1.7585835456848145, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.106250524520874, "rewards_train/margins_1": 1.9822666645050049, "rewards_train/margins_2": 1.4758355617523193, "step": 633 }, { "epoch": 1.9, "learning_rate": 3.942355513100793e-08, "loss": 0.4334, "step": 634 }, { "epoch": 1.9, "logps_train/policy_1_2": -27.3631649017334, "logps_train/policy_1_l": -57.74463653564453, "logps_train/policy_1_w": -40.150665283203125, "logps_train/policy_2_2": -17.640239715576172, "logps_train/policy_2_w": -61.840511322021484, "logps_train/ref_1_2": -31.25, "logps_train/ref_1_l": -43.0, "logps_train/ref_1_w": -55.25, "logps_train/ref_2_2": -26.0, "logps_train/ref_2_w": -66.5, "rewards_train/1-2": 0.3985956013202667, "rewards_train/1-l": -1.471583366394043, "rewards_train/1-w": 1.5234098434448242, "rewards_train/2-2": 0.8307026624679565, "rewards_train/2-w": 0.4725894629955292, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.994993209838867, "rewards_train/margins_1": 1.1248142421245575, "rewards_train/margins_2": 0.35811319947242737, "step": 634 }, { "epoch": 1.9, "logps_train/policy_1_2": -107.65538787841797, "logps_train/policy_1_l": -114.59791564941406, "logps_train/policy_1_w": -76.71109008789062, "logps_train/policy_2_2": -80.55021667480469, "logps_train/policy_2_w": -120.94901275634766, "logps_train/ref_1_2": -118.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.0014536380767822, "rewards_train/1-l": -1.6837180852890015, "rewards_train/1-w": 2.3988125324249268, "rewards_train/2-2": 1.656111478805542, "rewards_train/2-w": 0.7777553796768188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.082530617713928, "rewards_train/margins_1": 1.3973588943481445, "rewards_train/margins_2": 0.8783560991287231, "step": 634 }, { "epoch": 1.9, "logps_train/policy_1_2": -147.84103393554688, "logps_train/policy_1_l": -212.79490661621094, "logps_train/policy_1_w": -126.61235046386719, "logps_train/policy_2_2": -120.97587585449219, "logps_train/policy_2_w": -159.01327514648438, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 2.2627716064453125, "rewards_train/1-l": -3.9123032093048096, "rewards_train/1-w": 3.157515525817871, "rewards_train/2-2": 3.114912271499634, "rewards_train/2-w": 1.9439862966537476, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.069818735122681, "rewards_train/margins_1": 0.8947439193725586, "rewards_train/margins_2": 1.1709259748458862, "step": 634 }, { "epoch": 1.9, "logps_train/policy_1_2": -98.50267028808594, "logps_train/policy_1_l": -127.28308868408203, "logps_train/policy_1_w": -82.15995788574219, "logps_train/policy_2_2": -74.6832504272461, "logps_train/policy_2_w": -113.99868774414062, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -96.5, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.532546043395996, "rewards_train/1-l": -1.7089732885360718, "rewards_train/1-w": 2.679316759109497, "rewards_train/2-2": 2.171518325805664, "rewards_train/2-w": 1.6364588737487793, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.388290047645569, "rewards_train/margins_1": 1.146770715713501, "rewards_train/margins_2": 0.5350594520568848, "step": 634 }, { "epoch": 1.9, "logps_train/policy_1_2": -185.65042114257812, "logps_train/policy_1_l": -129.58599853515625, "logps_train/policy_1_w": -113.48226928710938, "logps_train/policy_2_2": -146.0228271484375, "logps_train/policy_2_w": -143.30734252929688, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.4904284477233887, "rewards_train/1-l": -1.428032636642456, "rewards_train/1-w": 2.9822425842285156, "rewards_train/2-2": 2.874279499053955, "rewards_train/2-w": 1.8708287477493286, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.410275220870972, "rewards_train/margins_1": 1.491814136505127, "rewards_train/margins_2": 1.0034507513046265, "step": 634 }, { "epoch": 1.9, "logps_train/policy_1_2": -134.84307861328125, "logps_train/policy_1_l": -128.2726287841797, "logps_train/policy_1_w": -129.55145263671875, "logps_train/policy_2_2": -103.28195190429688, "logps_train/policy_2_w": -172.07913208007812, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.649284839630127, "rewards_train/1-l": -1.2250168323516846, "rewards_train/1-w": 3.2636048793792725, "rewards_train/2-2": 3.0823521614074707, "rewards_train/2-w": 1.3561487197875977, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.488621711730957, "rewards_train/margins_1": 1.6143200397491455, "rewards_train/margins_2": 1.726203441619873, "step": 634 }, { "epoch": 1.9, "logps_train/policy_1_2": -206.0126495361328, "logps_train/policy_1_l": -143.20359802246094, "logps_train/policy_1_w": -121.49617004394531, "logps_train/policy_2_2": -170.07614135742188, "logps_train/policy_2_w": -152.9060821533203, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.433109998703003, "rewards_train/1-l": -0.9480944871902466, "rewards_train/1-w": 3.4285085201263428, "rewards_train/2-2": 3.031449794769287, "rewards_train/2-w": 2.284390687942505, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.376603007316589, "rewards_train/margins_1": 1.9953985214233398, "rewards_train/margins_2": 0.7470591068267822, "step": 634 }, { "epoch": 1.9, "logps_train/policy_1_2": -201.40582275390625, "logps_train/policy_1_l": -187.26809692382812, "logps_train/policy_1_w": -139.23846435546875, "logps_train/policy_2_2": -160.6238250732422, "logps_train/policy_2_w": -186.6653289794922, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.748478651046753, "rewards_train/1-l": -1.534597635269165, "rewards_train/1-w": 3.6636526584625244, "rewards_train/2-2": 3.5610544681549072, "rewards_train/2-w": 2.1240921020507812, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.1982502937316895, "rewards_train/margins_1": 1.9151740074157715, "rewards_train/margins_2": 1.436962366104126, "step": 634 }, { "epoch": 1.9, "logps_train/policy_1_2": -114.10665893554688, "logps_train/policy_1_l": -123.04966735839844, "logps_train/policy_1_w": -78.7307357788086, "logps_train/policy_2_2": -83.83006286621094, "logps_train/policy_2_w": -119.32792663574219, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 1.4158964157104492, "rewards_train/1-l": -1.7618024349212646, "rewards_train/1-w": 2.6347389221191406, "rewards_train/2-2": 2.840430736541748, "rewards_train/2-w": 1.0047078132629395, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.396541357040405, "rewards_train/margins_1": 1.2188425064086914, "rewards_train/margins_2": 1.8357229232788086, "step": 635 }, { "epoch": 1.9, "logps_train/policy_1_2": -83.39833068847656, "logps_train/policy_1_l": -113.85783386230469, "logps_train/policy_1_w": -54.1783447265625, "logps_train/policy_2_2": -64.70919036865234, "logps_train/policy_2_w": -80.06884765625, "logps_train/ref_1_2": -95.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -77.0, "logps_train/ref_2_2": -83.0, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": 1.1652456521987915, "rewards_train/1-l": -1.8016037940979004, "rewards_train/1-w": 2.275524616241455, "rewards_train/2-2": 1.849393606185913, "rewards_train/2-w": 1.2001467943191528, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.0771284103393555, "rewards_train/margins_1": 1.1102789640426636, "rewards_train/margins_2": 0.6492468118667603, "step": 635 }, { "epoch": 1.9, "logps_train/policy_1_2": -138.1764373779297, "logps_train/policy_1_l": -153.68832397460938, "logps_train/policy_1_w": -99.12120056152344, "logps_train/policy_2_2": -98.35884857177734, "logps_train/policy_2_w": -145.87832641601562, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.7511059045791626, "rewards_train/1-l": -1.897298812866211, "rewards_train/1-w": 3.625770092010498, "rewards_train/2-2": 3.3828649520874023, "rewards_train/2-w": 1.93599534034729, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.523068904876709, "rewards_train/margins_1": 1.8746641874313354, "rewards_train/margins_2": 1.4468696117401123, "step": 635 }, { "epoch": 1.9, "logps_train/policy_1_2": -178.1556854248047, "logps_train/policy_1_l": -261.12188720703125, "logps_train/policy_1_w": -147.59124755859375, "logps_train/policy_2_2": -142.86532592773438, "logps_train/policy_2_w": -186.85018920898438, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -238.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 2.746932029724121, "rewards_train/1-l": -2.3879690170288086, "rewards_train/1-w": 3.5236873626708984, "rewards_train/2-2": 3.7790918350219727, "rewards_train/2-w": 2.343104839324951, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.911656379699707, "rewards_train/margins_1": 0.7767553329467773, "rewards_train/margins_2": 1.4359869956970215, "step": 635 }, { "epoch": 1.9, "logps_train/policy_1_2": -128.25071716308594, "logps_train/policy_1_l": -165.32716369628906, "logps_train/policy_1_w": -72.209716796875, "logps_train/policy_2_2": -102.84486389160156, "logps_train/policy_2_w": -101.64063262939453, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -117.0, "rewards_train/1-2": 1.7483652830123901, "rewards_train/1-l": -2.8437271118164062, "rewards_train/1-w": 2.8149662017822266, "rewards_train/2-2": 2.7537946701049805, "rewards_train/2-w": 1.5859371423721313, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.658693313598633, "rewards_train/margins_1": 1.0666009187698364, "rewards_train/margins_2": 1.1678575277328491, "step": 635 }, { "epoch": 1.9, "logps_train/policy_1_2": -256.8731689453125, "logps_train/policy_1_l": -278.0226135253906, "logps_train/policy_1_w": -192.44854736328125, "logps_train/policy_2_2": -218.58189392089844, "logps_train/policy_2_w": -242.40113830566406, "logps_train/ref_1_2": -282.0, "logps_train/ref_1_l": -253.0, "logps_train/ref_1_w": -234.0, "logps_train/ref_2_2": -260.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 2.497056484222412, "rewards_train/1-l": -2.5296051502227783, "rewards_train/1-w": 4.195771217346191, "rewards_train/2-2": 4.291810989379883, "rewards_train/2-w": 2.567697048187256, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.72537636756897, "rewards_train/margins_1": 1.6987147331237793, "rewards_train/margins_2": 1.724113941192627, "step": 635 }, { "epoch": 1.9, "logps_train/policy_1_2": -109.86621856689453, "logps_train/policy_1_l": -138.53955078125, "logps_train/policy_1_w": -75.11994934082031, "logps_train/policy_2_2": -83.19392395019531, "logps_train/policy_2_w": -109.50747680664062, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.904003381729126, "rewards_train/1-l": -1.8148936033248901, "rewards_train/1-w": 2.9989428520202637, "rewards_train/2-2": 2.846623420715332, "rewards_train/2-w": 1.6836268901824951, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.813836455345154, "rewards_train/margins_1": 1.0949394702911377, "rewards_train/margins_2": 1.162996530532837, "step": 635 }, { "epoch": 1.9, "logps_train/policy_1_2": -158.96246337890625, "logps_train/policy_1_l": -262.61712646484375, "logps_train/policy_1_w": -148.71011352539062, "logps_train/policy_2_2": -129.79592895507812, "logps_train/policy_2_w": -182.69427490234375, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -232.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.9385191202163696, "rewards_train/1-l": -3.153902769088745, "rewards_train/1-w": 3.4743010997772217, "rewards_train/2-2": 2.9372026920318604, "rewards_train/2-w": 1.9446353912353516, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.628203868865967, "rewards_train/margins_1": 1.535781979560852, "rewards_train/margins_2": 0.9925673007965088, "step": 635 }, { "epoch": 1.9, "learning_rate": 3.517485433412987e-08, "loss": 0.3596, "step": 636 }, { "epoch": 1.9, "logps_train/policy_1_2": -88.37060546875, "logps_train/policy_1_l": -103.55070495605469, "logps_train/policy_1_w": -98.14512634277344, "logps_train/policy_2_2": -66.61894989013672, "logps_train/policy_2_w": -125.48992919921875, "logps_train/ref_1_2": -101.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.2441890239715576, "rewards_train/1-l": -1.9177230596542358, "rewards_train/1-w": 2.270839214324951, "rewards_train/2-2": 2.0425732135772705, "rewards_train/2-w": 0.8744449019432068, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.188562273979187, "rewards_train/margins_1": 1.0266501903533936, "rewards_train/margins_2": 1.1681283116340637, "step": 636 }, { "epoch": 1.9, "logps_train/policy_1_2": -241.1581573486328, "logps_train/policy_1_l": -239.4161376953125, "logps_train/policy_1_w": -115.44783020019531, "logps_train/policy_2_2": -193.1842041015625, "logps_train/policy_2_w": -146.59268188476562, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -235.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": 1.7169955968856812, "rewards_train/1-l": -3.610363006591797, "rewards_train/1-w": 2.8559980392456055, "rewards_train/2-2": 4.167516708374023, "rewards_train/2-w": 2.033308982849121, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.466361045837402, "rewards_train/margins_1": 1.1390024423599243, "rewards_train/margins_2": 2.1342077255249023, "step": 636 }, { "epoch": 1.9, "logps_train/policy_1_2": -136.245361328125, "logps_train/policy_1_l": -84.72916412353516, "logps_train/policy_1_w": -49.00122833251953, "logps_train/policy_2_2": -103.83921813964844, "logps_train/policy_2_w": -76.72073364257812, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -63.5, "logps_train/ref_1_w": -67.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -80.0, "rewards_train/1-2": 0.4957759380340576, "rewards_train/1-l": -2.1149086952209473, "rewards_train/1-w": 1.7725337743759155, "rewards_train/2-2": 2.2129528522491455, "rewards_train/2-w": 0.3410128355026245, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.887442469596863, "rewards_train/margins_1": 1.276757836341858, "rewards_train/margins_2": 1.871940016746521, "step": 636 }, { "epoch": 1.9, "logps_train/policy_1_2": -150.84051513671875, "logps_train/policy_1_l": -144.3109588623047, "logps_train/policy_1_w": -87.7735595703125, "logps_train/policy_2_2": -105.29003143310547, "logps_train/policy_2_w": -129.03208923339844, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.29563570022583, "rewards_train/1-l": -2.841691732406616, "rewards_train/1-w": 3.5710811614990234, "rewards_train/2-2": 2.805372476577759, "rewards_train/2-w": 2.1280415058135986, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.41277289390564, "rewards_train/margins_1": 2.2754454612731934, "rewards_train/margins_2": 0.6773309707641602, "step": 636 }, { "epoch": 1.9, "logps_train/policy_1_2": -214.53134155273438, "logps_train/policy_1_l": -236.2427520751953, "logps_train/policy_1_w": -191.7013397216797, "logps_train/policy_2_2": -182.09486389160156, "logps_train/policy_2_w": -246.2575225830078, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -237.0, "logps_train/ref_2_2": -223.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": 2.5812416076660156, "rewards_train/1-l": -3.121150016784668, "rewards_train/1-w": 4.5298662185668945, "rewards_train/2-2": 4.11238956451416, "rewards_train/2-w": 2.2492477893829346, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.6510162353515625, "rewards_train/margins_1": 1.948624610900879, "rewards_train/margins_2": 1.8631417751312256, "step": 636 }, { "epoch": 1.9, "logps_train/policy_1_2": -147.2220001220703, "logps_train/policy_1_l": -158.42941284179688, "logps_train/policy_1_w": -103.39045715332031, "logps_train/policy_2_2": -104.17036437988281, "logps_train/policy_2_w": -145.5727081298828, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.3746756315231323, "rewards_train/1-l": -1.542158842086792, "rewards_train/1-w": 2.9312667846679688, "rewards_train/2-2": 3.0064005851745605, "rewards_train/2-w": 1.4521042108535767, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.473425626754761, "rewards_train/margins_1": 1.5565911531448364, "rewards_train/margins_2": 1.5542963743209839, "step": 636 }, { "epoch": 1.9, "logps_train/policy_1_2": -200.14920043945312, "logps_train/policy_1_l": -285.3940124511719, "logps_train/policy_1_w": -170.66656494140625, "logps_train/policy_2_2": -155.17605590820312, "logps_train/policy_2_w": -215.22610473632812, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -250.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.5995326042175293, "rewards_train/1-l": -3.52065110206604, "rewards_train/1-w": 3.7302191257476807, "rewards_train/2-2": 3.204270839691162, "rewards_train/2-w": 1.87738835811615, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.250870227813721, "rewards_train/margins_1": 2.1306865215301514, "rewards_train/margins_2": 1.3268824815750122, "step": 636 }, { "epoch": 1.9, "logps_train/policy_1_2": -121.67333221435547, "logps_train/policy_1_l": -134.55665588378906, "logps_train/policy_1_w": -76.27531433105469, "logps_train/policy_2_2": -99.64456176757812, "logps_train/policy_2_w": -107.6907730102539, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 1.5865733623504639, "rewards_train/1-l": -1.1709003448486328, "rewards_train/1-w": 2.5326249599456787, "rewards_train/2-2": 2.6835904121398926, "rewards_train/2-w": 1.7340474128723145, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.7035253047943115, "rewards_train/margins_1": 0.9460515975952148, "rewards_train/margins_2": 0.9495429992675781, "step": 636 }, { "epoch": 1.91, "logps_train/policy_1_2": -152.25100708007812, "logps_train/policy_1_l": -167.44924926757812, "logps_train/policy_1_w": -142.9122772216797, "logps_train/policy_2_2": -116.8148422241211, "logps_train/policy_2_w": -187.47071838378906, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.448337435722351, "rewards_train/1-l": -1.2533247470855713, "rewards_train/1-w": 3.236116647720337, "rewards_train/2-2": 2.9357035160064697, "rewards_train/2-w": 1.2044906616210938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.489441394805908, "rewards_train/margins_1": 1.7877792119979858, "rewards_train/margins_2": 1.731212854385376, "step": 637 }, { "epoch": 1.91, "logps_train/policy_1_2": -151.37445068359375, "logps_train/policy_1_l": -154.9640655517578, "logps_train/policy_1_w": -122.8110580444336, "logps_train/policy_2_2": -115.83220672607422, "logps_train/policy_2_w": -162.52935791015625, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.59224271774292, "rewards_train/1-l": -2.1206254959106445, "rewards_train/1-w": 3.489987373352051, "rewards_train/2-2": 3.027716636657715, "rewards_train/2-w": 1.70097017288208, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.610612869262695, "rewards_train/margins_1": 1.8977446556091309, "rewards_train/margins_2": 1.3267464637756348, "step": 637 }, { "epoch": 1.91, "logps_train/policy_1_2": -106.02755737304688, "logps_train/policy_1_l": -195.79568481445312, "logps_train/policy_1_w": -159.3857421875, "logps_train/policy_2_2": -82.5587158203125, "logps_train/policy_2_w": -216.09390258789062, "logps_train/ref_1_2": -122.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -227.0, "rewards_train/1-2": 1.6043739318847656, "rewards_train/1-l": -2.357693910598755, "rewards_train/1-w": 3.207519054412842, "rewards_train/2-2": 2.394714832305908, "rewards_train/2-w": 1.090611219406128, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.565212965011597, "rewards_train/margins_1": 1.6031451225280762, "rewards_train/margins_2": 1.3041036128997803, "step": 637 }, { "epoch": 1.91, "logps_train/policy_1_2": -147.89219665527344, "logps_train/policy_1_l": -168.20034790039062, "logps_train/policy_1_w": -105.39337158203125, "logps_train/policy_2_2": -114.38184356689453, "logps_train/policy_2_w": -131.72274780273438, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.4620497226715088, "rewards_train/1-l": -2.665639638900757, "rewards_train/1-w": 2.9692564010620117, "rewards_train/2-2": 3.0344717502593994, "rewards_train/2-w": 1.7558506727218628, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.6348960399627686, "rewards_train/margins_1": 1.507206678390503, "rewards_train/margins_2": 1.2786210775375366, "step": 637 }, { "epoch": 1.91, "logps_train/policy_1_2": -179.22042846679688, "logps_train/policy_1_l": -147.3942108154297, "logps_train/policy_1_w": -90.05368041992188, "logps_train/policy_2_2": -143.45828247070312, "logps_train/policy_2_w": -101.62659454345703, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -112.5, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": 0.8892860412597656, "rewards_train/1-l": -1.7116867303848267, "rewards_train/1-w": 2.2704129219055176, "rewards_train/2-2": 2.5775842666625977, "rewards_train/2-w": 1.918566346168518, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9820996522903442, "rewards_train/margins_1": 1.381126880645752, "rewards_train/margins_2": 0.6590179204940796, "step": 637 }, { "epoch": 1.91, "logps_train/policy_1_2": -134.4587860107422, "logps_train/policy_1_l": -221.51681518554688, "logps_train/policy_1_w": -185.9549102783203, "logps_train/policy_2_2": -109.4493408203125, "logps_train/policy_2_w": -232.44581604003906, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -226.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 2.007246255874634, "rewards_train/1-l": -1.9958217144012451, "rewards_train/1-w": 3.967008590698242, "rewards_train/2-2": 2.6003785133361816, "rewards_train/2-w": 1.642917275428772, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.962830305099487, "rewards_train/margins_1": 1.9597623348236084, "rewards_train/margins_2": 0.9574612379074097, "step": 637 }, { "epoch": 1.91, "logps_train/policy_1_2": -126.48937225341797, "logps_train/policy_1_l": -177.699951171875, "logps_train/policy_1_w": -62.36576461791992, "logps_train/policy_2_2": -107.79193878173828, "logps_train/policy_2_w": -81.01435852050781, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -83.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -96.0, "rewards_train/1-2": 1.4260632991790771, "rewards_train/1-l": -1.9793710708618164, "rewards_train/1-w": 2.058345317840576, "rewards_train/2-2": 2.0297908782958984, "rewards_train/2-w": 1.5331346988677979, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.037716388702393, "rewards_train/margins_1": 0.632282018661499, "rewards_train/margins_2": 0.4966561794281006, "step": 637 }, { "epoch": 1.91, "logps_train/policy_1_2": -151.44776916503906, "logps_train/policy_1_l": -132.53892517089844, "logps_train/policy_1_w": -100.48440551757812, "logps_train/policy_2_2": -97.4390869140625, "logps_train/policy_2_w": -143.74652099609375, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 0.6270973682403564, "rewards_train/1-l": -2.0929059982299805, "rewards_train/1-w": 2.7710914611816406, "rewards_train/2-2": 2.9205446243286133, "rewards_train/2-w": 1.0698792934417725, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.863997459411621, "rewards_train/margins_1": 2.143994092941284, "rewards_train/margins_2": 1.8506653308868408, "step": 637 }, { "epoch": 1.91, "learning_rate": 3.1166716126249664e-08, "loss": 0.3515, "step": 638 }, { "epoch": 1.91, "logps_train/policy_1_2": -147.25233459472656, "logps_train/policy_1_l": -222.4931640625, "logps_train/policy_1_w": -97.61100769042969, "logps_train/policy_2_2": -114.05643463134766, "logps_train/policy_2_w": -120.14430236816406, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 1.548205018043518, "rewards_train/1-l": -2.8586926460266113, "rewards_train/1-w": 2.893587112426758, "rewards_train/2-2": 2.6756064891815186, "rewards_train/2-w": 2.1933822631835938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.752279758453369, "rewards_train/margins_1": 1.3453820943832397, "rewards_train/margins_2": 0.4822242259979248, "step": 638 }, { "epoch": 1.91, "logps_train/policy_1_2": -124.43020629882812, "logps_train/policy_1_l": -127.68183898925781, "logps_train/policy_1_w": -70.7213134765625, "logps_train/policy_2_2": -93.27837371826172, "logps_train/policy_2_w": -90.83377075195312, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 1.569478988647461, "rewards_train/1-l": -1.8611526489257812, "rewards_train/1-w": 2.9825563430786133, "rewards_train/2-2": 2.9541940689086914, "rewards_train/2-w": 2.025216817855835, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.8437089920043945, "rewards_train/margins_1": 1.4130773544311523, "rewards_train/margins_2": 0.9289772510528564, "step": 638 }, { "epoch": 1.91, "logps_train/policy_1_2": -161.68777465820312, "logps_train/policy_1_l": -172.12490844726562, "logps_train/policy_1_w": -104.21907043457031, "logps_train/policy_2_2": -129.8152313232422, "logps_train/policy_2_w": -138.7332000732422, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.4788780212402344, "rewards_train/1-l": -2.737295150756836, "rewards_train/1-w": 2.7365407943725586, "rewards_train/2-2": 2.9466018676757812, "rewards_train/2-w": 1.4661332368850708, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.4738359451293945, "rewards_train/margins_1": 1.2576627731323242, "rewards_train/margins_2": 1.4804686307907104, "step": 638 }, { "epoch": 1.91, "logps_train/policy_1_2": -151.7332305908203, "logps_train/policy_1_l": -204.374755859375, "logps_train/policy_1_w": -104.60001373291016, "logps_train/policy_2_2": -114.2110595703125, "logps_train/policy_2_w": -134.39454650878906, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 1.7001149654388428, "rewards_train/1-l": -2.320678949356079, "rewards_train/1-w": 3.100935935974121, "rewards_train/2-2": 3.05545711517334, "rewards_train/2-w": 2.065232753753662, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.4216148853302, "rewards_train/margins_1": 1.4008209705352783, "rewards_train/margins_2": 0.9902243614196777, "step": 638 }, { "epoch": 1.91, "logps_train/policy_1_2": -86.0445785522461, "logps_train/policy_1_l": -73.37901306152344, "logps_train/policy_1_w": -50.0209846496582, "logps_train/policy_2_2": -54.854896545410156, "logps_train/policy_2_w": -73.25184631347656, "logps_train/ref_1_2": -96.5, "logps_train/ref_1_l": -51.25, "logps_train/ref_1_w": -71.5, "logps_train/ref_2_2": -76.0, "logps_train/ref_2_w": -84.0, "rewards_train/1-2": 1.03773033618927, "rewards_train/1-l": -2.2080187797546387, "rewards_train/1-w": 2.1369643211364746, "rewards_train/2-2": 2.1184167861938477, "rewards_train/2-w": 1.0541119575500488, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.344983100891113, "rewards_train/margins_1": 1.0992339849472046, "rewards_train/margins_2": 1.0643048286437988, "step": 638 }, { "epoch": 1.91, "logps_train/policy_1_2": -174.73193359375, "logps_train/policy_1_l": -138.41262817382812, "logps_train/policy_1_w": -104.41972351074219, "logps_train/policy_2_2": -137.95399475097656, "logps_train/policy_2_w": -149.18763732910156, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -115.5, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.0121588706970215, "rewards_train/1-l": -2.293478012084961, "rewards_train/1-w": 2.8733596801757812, "rewards_train/2-2": 2.727964162826538, "rewards_train/2-w": 0.8538931608200073, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.166837692260742, "rewards_train/margins_1": 1.8612008094787598, "rewards_train/margins_2": 1.8740710020065308, "step": 638 }, { "epoch": 1.91, "logps_train/policy_1_2": -116.13663482666016, "logps_train/policy_1_l": -134.12611389160156, "logps_train/policy_1_w": -79.53955841064453, "logps_train/policy_2_2": -85.13019561767578, "logps_train/policy_2_w": -123.02920532226562, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -107.5, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.331648588180542, "rewards_train/1-l": -2.2333145141601562, "rewards_train/1-w": 2.8476064205169678, "rewards_train/2-2": 2.2608091831207275, "rewards_train/2-w": 0.8142671585083008, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.080920934677124, "rewards_train/margins_1": 1.5159578323364258, "rewards_train/margins_2": 1.4465420246124268, "step": 638 }, { "epoch": 1.91, "logps_train/policy_1_2": -169.12786865234375, "logps_train/policy_1_l": -146.5323028564453, "logps_train/policy_1_w": -137.96090698242188, "logps_train/policy_2_2": -126.32548522949219, "logps_train/policy_2_w": -176.846923828125, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.3926832675933838, "rewards_train/1-l": -1.435652256011963, "rewards_train/1-w": 3.03047251701355, "rewards_train/2-2": 3.2551462650299072, "rewards_train/2-w": 1.068432092666626, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.466124773025513, "rewards_train/margins_1": 1.637789249420166, "rewards_train/margins_2": 2.1867141723632812, "step": 638 }, { "epoch": 1.91, "logps_train/policy_1_2": -222.7039031982422, "logps_train/policy_1_l": -167.03787231445312, "logps_train/policy_1_w": -126.5237808227539, "logps_train/policy_2_2": -182.96055603027344, "logps_train/policy_2_w": -170.42999267578125, "logps_train/ref_1_2": -235.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.273359775543213, "rewards_train/1-l": -1.3756614923477173, "rewards_train/1-w": 3.528872013092041, "rewards_train/2-2": 2.8156633377075195, "rewards_train/2-w": 2.035125494003296, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.904533505439758, "rewards_train/margins_1": 2.255512237548828, "rewards_train/margins_2": 0.7805378437042236, "step": 639 }, { "epoch": 1.91, "logps_train/policy_1_2": -161.83209228515625, "logps_train/policy_1_l": -133.26959228515625, "logps_train/policy_1_w": -128.8816375732422, "logps_train/policy_2_2": -129.0213165283203, "logps_train/policy_2_w": -168.0933380126953, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.3433526754379272, "rewards_train/1-l": -1.9714910984039307, "rewards_train/1-w": 3.181367874145508, "rewards_train/2-2": 2.722477436065674, "rewards_train/2-w": 1.5891039371490479, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.1528589725494385, "rewards_train/margins_1": 1.8380151987075806, "rewards_train/margins_2": 1.133373498916626, "step": 639 }, { "epoch": 1.91, "logps_train/policy_1_2": -174.41738891601562, "logps_train/policy_1_l": -165.68382263183594, "logps_train/policy_1_w": -146.03564453125, "logps_train/policy_2_2": -126.69815063476562, "logps_train/policy_2_w": -182.24851989746094, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.675058126449585, "rewards_train/1-l": -1.268479824066162, "rewards_train/1-w": 3.4019031524658203, "rewards_train/2-2": 3.1387786865234375, "rewards_train/2-w": 1.9212417602539062, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.670382976531982, "rewards_train/margins_1": 1.7268450260162354, "rewards_train/margins_2": 1.2175369262695312, "step": 639 }, { "epoch": 1.91, "logps_train/policy_1_2": -124.40092468261719, "logps_train/policy_1_l": -165.18157958984375, "logps_train/policy_1_w": -85.65714263916016, "logps_train/policy_2_2": -100.51138305664062, "logps_train/policy_2_w": -108.35137939453125, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -124.5, "rewards_train/1-2": 1.4458447694778442, "rewards_train/1-l": -2.068939685821533, "rewards_train/1-w": 2.414754629135132, "rewards_train/2-2": 2.6238620281219482, "rewards_train/2-w": 1.6136904954910278, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.483694314956665, "rewards_train/margins_1": 0.9689098596572876, "rewards_train/margins_2": 1.0101715326309204, "step": 639 }, { "epoch": 1.91, "logps_train/policy_1_2": -117.42153930664062, "logps_train/policy_1_l": -100.88198852539062, "logps_train/policy_1_w": -105.47476196289062, "logps_train/policy_2_2": -97.90188598632812, "logps_train/policy_2_w": -139.42724609375, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -85.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.6523771286010742, "rewards_train/1-l": -1.5702298879623413, "rewards_train/1-w": 3.010519504547119, "rewards_train/2-2": 2.540670871734619, "rewards_train/2-w": 1.3070803880691528, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.5807493925094604, "rewards_train/margins_1": 1.358142375946045, "rewards_train/margins_2": 1.2335904836654663, "step": 639 }, { "epoch": 1.91, "logps_train/policy_1_2": -133.28305053710938, "logps_train/policy_1_l": -133.19784545898438, "logps_train/policy_1_w": -114.77804565429688, "logps_train/policy_2_2": -99.58740234375, "logps_train/policy_2_w": -155.07931518554688, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 1.986929178237915, "rewards_train/1-l": -2.045077085494995, "rewards_train/1-w": 3.871804714202881, "rewards_train/2-2": 3.2701656818389893, "rewards_train/2-w": 1.9670697450637817, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.916881799697876, "rewards_train/margins_1": 1.8848755359649658, "rewards_train/margins_2": 1.3030959367752075, "step": 639 }, { "epoch": 1.91, "logps_train/policy_1_2": -159.29051208496094, "logps_train/policy_1_l": -125.3125, "logps_train/policy_1_w": -93.19107055664062, "logps_train/policy_2_2": -119.76112365722656, "logps_train/policy_2_w": -112.32623291015625, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.2303240299224854, "rewards_train/1-l": -1.6221189498901367, "rewards_train/1-w": 2.314877510070801, "rewards_train/2-2": 2.6254498958587646, "rewards_train/2-w": 1.4122984409332275, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.9369964599609375, "rewards_train/margins_1": 1.0845534801483154, "rewards_train/margins_2": 1.213151454925537, "step": 639 }, { "epoch": 1.91, "logps_train/policy_1_2": -60.512596130371094, "logps_train/policy_1_l": -87.34318542480469, "logps_train/policy_1_w": -82.68097686767578, "logps_train/policy_2_2": -46.636070251464844, "logps_train/policy_2_w": -104.09562683105469, "logps_train/ref_1_2": -69.5, "logps_train/ref_1_l": -72.0, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -59.5, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 0.8985450267791748, "rewards_train/1-l": -1.5632247924804688, "rewards_train/1-w": 2.4229185581207275, "rewards_train/2-2": 1.2781896591186523, "rewards_train/2-w": 1.390437364578247, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.9861433506011963, "rewards_train/margins_1": 1.5243735313415527, "rewards_train/margins_2": -0.11224770545959473, "step": 639 }, { "epoch": 1.92, "learning_rate": 2.739953169458992e-08, "loss": 0.4149, "step": 640 }, { "epoch": 1.92, "logps_train/policy_1_2": -97.36763000488281, "logps_train/policy_1_l": -123.02333068847656, "logps_train/policy_1_w": -57.953697204589844, "logps_train/policy_2_2": -67.81135559082031, "logps_train/policy_2_w": -80.9320068359375, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -99.5, "logps_train/ref_1_w": -76.0, "logps_train/ref_2_2": -88.0, "logps_train/ref_2_w": -90.0, "rewards_train/1-2": 0.6859913468360901, "rewards_train/1-l": -2.3390519618988037, "rewards_train/1-w": 1.8163490295410156, "rewards_train/2-2": 2.0482590198516846, "rewards_train/2-w": 0.9308224320411682, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.155400991439819, "rewards_train/margins_1": 1.1303576827049255, "rewards_train/margins_2": 1.1174365878105164, "step": 640 }, { "epoch": 1.92, "logps_train/policy_1_2": -220.56312561035156, "logps_train/policy_1_l": -186.7561798095703, "logps_train/policy_1_w": -164.92251586914062, "logps_train/policy_2_2": -178.39987182617188, "logps_train/policy_2_w": -210.8415985107422, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -205.0, "logps_train/ref_2_2": -220.0, "logps_train/ref_2_w": -235.0, "rewards_train/1-2": 2.49212384223938, "rewards_train/1-l": -2.1394848823547363, "rewards_train/1-w": 4.031186580657959, "rewards_train/2-2": 4.195949554443359, "rewards_train/2-w": 2.4470901489257812, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.170671463012695, "rewards_train/margins_1": 1.539062738418579, "rewards_train/margins_2": 1.7488594055175781, "step": 640 }, { "epoch": 1.92, "logps_train/policy_1_2": -58.17744445800781, "logps_train/policy_1_l": -101.08062744140625, "logps_train/policy_1_w": -101.26591491699219, "logps_train/policy_2_2": -46.639793395996094, "logps_train/policy_2_w": -137.2989959716797, "logps_train/ref_1_2": -63.0, "logps_train/ref_1_l": -82.5, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -57.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": 0.49827125668525696, "rewards_train/1-l": -1.8730034828186035, "rewards_train/1-w": 2.405634880065918, "rewards_train/2-2": 1.032797932624817, "rewards_train/2-w": 0.5348461866378784, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.2786383628845215, "rewards_train/margins_1": 1.907363623380661, "rewards_train/margins_2": 0.4979517459869385, "step": 640 }, { "epoch": 1.92, "logps_train/policy_1_2": -136.85934448242188, "logps_train/policy_1_l": -133.17193603515625, "logps_train/policy_1_w": -94.36158752441406, "logps_train/policy_2_2": -101.99197387695312, "logps_train/policy_2_w": -149.07431030273438, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -122.5, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 0.5150411128997803, "rewards_train/1-l": -1.5529606342315674, "rewards_train/1-w": 2.4023170471191406, "rewards_train/2-2": 2.023068428039551, "rewards_train/2-w": 0.40624120831489563, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.955277681350708, "rewards_train/margins_1": 1.8872759342193604, "rewards_train/margins_2": 1.6168272197246552, "step": 640 }, { "epoch": 1.92, "logps_train/policy_1_2": -124.03889465332031, "logps_train/policy_1_l": -184.6988525390625, "logps_train/policy_1_w": -79.97930145263672, "logps_train/policy_2_2": -98.29315185546875, "logps_train/policy_2_w": -106.59246063232422, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 1.611735224723816, "rewards_train/1-l": -2.56011962890625, "rewards_train/1-w": 2.301532506942749, "rewards_train/2-2": 2.795685291290283, "rewards_train/2-w": 1.538703203201294, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.861652135848999, "rewards_train/margins_1": 0.6897972822189331, "rewards_train/margins_2": 1.2569820880889893, "step": 640 }, { "epoch": 1.92, "logps_train/policy_1_2": -143.37875366210938, "logps_train/policy_1_l": -139.6202850341797, "logps_train/policy_1_w": -134.80665588378906, "logps_train/policy_2_2": -126.4869384765625, "logps_train/policy_2_w": -160.70111083984375, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 2.055873394012451, "rewards_train/1-l": -1.460953950881958, "rewards_train/1-w": 2.7961905002593994, "rewards_train/2-2": 2.3138067722320557, "rewards_train/2-w": 1.446685552597046, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.257144451141357, "rewards_train/margins_1": 0.7403171062469482, "rewards_train/margins_2": 0.8671212196350098, "step": 640 }, { "epoch": 1.92, "logps_train/policy_1_2": -155.78382873535156, "logps_train/policy_1_l": -152.2896728515625, "logps_train/policy_1_w": -104.07987213134766, "logps_train/policy_2_2": -119.50518798828125, "logps_train/policy_2_w": -146.6829376220703, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.0138046741485596, "rewards_train/1-l": -1.9789671897888184, "rewards_train/1-w": 2.9998254776000977, "rewards_train/2-2": 2.265105724334717, "rewards_train/2-w": 1.187955379486084, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.978792667388916, "rewards_train/margins_1": 1.986020803451538, "rewards_train/margins_2": 1.0771503448486328, "step": 640 }, { "epoch": 1.92, "logps_train/policy_1_2": -235.07928466796875, "logps_train/policy_1_l": -200.9307861328125, "logps_train/policy_1_w": -185.92971801757812, "logps_train/policy_2_2": -200.87315368652344, "logps_train/policy_2_w": -221.76239013671875, "logps_train/ref_1_2": -254.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -236.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": 1.9194153547286987, "rewards_train/1-l": -1.885266900062561, "rewards_train/1-w": 3.1242165565490723, "rewards_train/2-2": 3.5548722743988037, "rewards_train/2-w": 1.637823224067688, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.009483456611633, "rewards_train/margins_1": 1.2048012018203735, "rewards_train/margins_2": 1.9170490503311157, "step": 640 }, { "epoch": 1.92, "logps_train/policy_1_2": -106.07398986816406, "logps_train/policy_1_l": -69.21955108642578, "logps_train/policy_1_w": -67.60120391845703, "logps_train/policy_2_2": -74.80232238769531, "logps_train/policy_2_w": -97.77767181396484, "logps_train/ref_1_2": -112.5, "logps_train/ref_1_l": -57.5, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 0.6511949896812439, "rewards_train/1-l": -1.1825997829437256, "rewards_train/1-w": 2.4375357627868652, "rewards_train/2-2": 2.0928149223327637, "rewards_train/2-w": 1.1589515209197998, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.620135545730591, "rewards_train/margins_1": 1.7863407731056213, "rewards_train/margins_2": 0.9338634014129639, "step": 641 }, { "epoch": 1.92, "logps_train/policy_1_2": -172.44216918945312, "logps_train/policy_1_l": -184.05490112304688, "logps_train/policy_1_w": -91.65478515625, "logps_train/policy_2_2": -131.9942626953125, "logps_train/policy_2_w": -120.65499877929688, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.1284395456314087, "rewards_train/1-l": -2.652365207672119, "rewards_train/1-w": 2.8782715797424316, "rewards_train/2-2": 2.8829946517944336, "rewards_train/2-w": 2.07200026512146, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.530636787414551, "rewards_train/margins_1": 1.749832034111023, "rewards_train/margins_2": 0.8109943866729736, "step": 641 }, { "epoch": 1.92, "logps_train/policy_1_2": -141.8012237548828, "logps_train/policy_1_l": -128.3117218017578, "logps_train/policy_1_w": -137.66238403320312, "logps_train/policy_2_2": -107.0872802734375, "logps_train/policy_2_w": -182.0775146484375, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.1905803680419922, "rewards_train/1-l": -2.169844627380371, "rewards_train/1-w": 3.2044639587402344, "rewards_train/2-2": 2.5072875022888184, "rewards_train/2-w": 1.2141236066818237, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.3743085861206055, "rewards_train/margins_1": 2.013883590698242, "rewards_train/margins_2": 1.2931638956069946, "step": 641 }, { "epoch": 1.92, "logps_train/policy_1_2": -145.30645751953125, "logps_train/policy_1_l": -207.44998168945312, "logps_train/policy_1_w": -110.03453063964844, "logps_train/policy_2_2": -117.0454330444336, "logps_train/policy_2_w": -141.54501342773438, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.8052916526794434, "rewards_train/1-l": -2.489920139312744, "rewards_train/1-w": 3.6266255378723145, "rewards_train/2-2": 2.840769052505493, "rewards_train/2-w": 2.6408114433288574, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.116545677185059, "rewards_train/margins_1": 1.821333885192871, "rewards_train/margins_2": 0.19995760917663574, "step": 641 }, { "epoch": 1.92, "logps_train/policy_1_2": -49.97249221801758, "logps_train/policy_1_l": -118.17121887207031, "logps_train/policy_1_w": -62.401817321777344, "logps_train/policy_2_2": -38.59853744506836, "logps_train/policy_2_w": -82.39501953125, "logps_train/ref_1_2": -54.5, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -81.0, "logps_train/ref_2_2": -47.0, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": 0.4473796486854553, "rewards_train/1-l": -1.5061845779418945, "rewards_train/1-w": 1.8266150951385498, "rewards_train/2-2": 0.835752010345459, "rewards_train/2-w": 0.9476072788238525, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 3.3327996730804443, "rewards_train/margins_1": 1.3792354464530945, "rewards_train/margins_2": -0.11185526847839355, "step": 641 }, { "epoch": 1.92, "logps_train/policy_1_2": -122.59808349609375, "logps_train/policy_1_l": -145.05453491210938, "logps_train/policy_1_w": -68.76579284667969, "logps_train/policy_2_2": -97.11082458496094, "logps_train/policy_2_w": -96.29371643066406, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 0.7532768845558167, "rewards_train/1-l": -1.9968594312667847, "rewards_train/1-w": 2.1127758026123047, "rewards_train/2-2": 2.008643627166748, "rewards_train/2-w": 1.2764872312545776, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.109635233879089, "rewards_train/margins_1": 1.359498918056488, "rewards_train/margins_2": 0.7321563959121704, "step": 641 }, { "epoch": 1.92, "logps_train/policy_1_2": -53.052345275878906, "logps_train/policy_1_l": -100.08325958251953, "logps_train/policy_1_w": -59.68143081665039, "logps_train/policy_2_2": -36.39389419555664, "logps_train/policy_2_w": -88.79838562011719, "logps_train/ref_1_2": -63.5, "logps_train/ref_1_l": -83.5, "logps_train/ref_1_w": -78.0, "logps_train/ref_2_2": -52.0, "logps_train/ref_2_w": -97.0, "rewards_train/1-2": 1.0665425062179565, "rewards_train/1-l": -1.6576423645019531, "rewards_train/1-w": 1.8541224002838135, "rewards_train/2-2": 1.5446927547454834, "rewards_train/2-w": 0.8393020629882812, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.5117647647857666, "rewards_train/margins_1": 0.7875798940658569, "rewards_train/margins_2": 0.7053906917572021, "step": 641 }, { "epoch": 1.92, "logps_train/policy_1_2": -200.18267822265625, "logps_train/policy_1_l": -174.56521606445312, "logps_train/policy_1_w": -112.61199188232422, "logps_train/policy_2_2": -153.1429443359375, "logps_train/policy_2_w": -157.2906494140625, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 2.2129807472229004, "rewards_train/1-l": -1.6229275465011597, "rewards_train/1-w": 3.3169257640838623, "rewards_train/2-2": 4.14664363861084, "rewards_train/2-w": 1.5240598917007446, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.939853310585022, "rewards_train/margins_1": 1.103945016860962, "rewards_train/margins_2": 2.622583746910095, "step": 641 }, { "epoch": 1.92, "learning_rate": 2.387366870971103e-08, "loss": 0.5118, "step": 642 }, { "epoch": 1.92, "logps_train/policy_1_2": -86.40453338623047, "logps_train/policy_1_l": -197.3296661376953, "logps_train/policy_1_w": -85.35527038574219, "logps_train/policy_2_2": -72.4442138671875, "logps_train/policy_2_w": -104.62247467041016, "logps_train/ref_1_2": -103.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.6657967567443848, "rewards_train/1-l": -3.388434886932373, "rewards_train/1-w": 3.344160318374634, "rewards_train/2-2": 2.179015874862671, "rewards_train/2-w": 2.6182217597961426, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.732595205307007, "rewards_train/margins_1": 1.678363561630249, "rewards_train/margins_2": -0.4392058849334717, "step": 642 }, { "epoch": 1.92, "logps_train/policy_1_2": -128.42349243164062, "logps_train/policy_1_l": -130.3753662109375, "logps_train/policy_1_w": -71.67807006835938, "logps_train/policy_2_2": -99.17120361328125, "logps_train/policy_2_w": -98.35943603515625, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 1.7295259237289429, "rewards_train/1-l": -2.3262076377868652, "rewards_train/1-w": 2.495473861694336, "rewards_train/2-2": 2.8570985794067383, "rewards_train/2-w": 1.590619444847107, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.821681499481201, "rewards_train/margins_1": 0.7659479379653931, "rewards_train/margins_2": 1.2664791345596313, "step": 642 }, { "epoch": 1.92, "logps_train/policy_1_2": -106.25306701660156, "logps_train/policy_1_l": -130.0896453857422, "logps_train/policy_1_w": -66.93446350097656, "logps_train/policy_2_2": -76.71688842773438, "logps_train/policy_2_w": -89.06678771972656, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -107.5, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -101.5, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 1.3793816566467285, "rewards_train/1-l": -2.2550580501556396, "rewards_train/1-w": 2.163194417953491, "rewards_train/2-2": 2.491201639175415, "rewards_train/2-w": 0.9161727428436279, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.418252468109131, "rewards_train/margins_1": 0.7838127613067627, "rewards_train/margins_2": 1.575028896331787, "step": 642 }, { "epoch": 1.92, "logps_train/policy_1_2": -190.439453125, "logps_train/policy_1_l": -124.13114929199219, "logps_train/policy_1_w": -162.53338623046875, "logps_train/policy_2_2": -136.27671813964844, "logps_train/policy_2_w": -209.6558837890625, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 0.341991662979126, "rewards_train/1-l": -1.7147140502929688, "rewards_train/1-w": 3.0517385005950928, "rewards_train/2-2": 2.7387351989746094, "rewards_train/2-w": 0.9922236204147339, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.7664525508880615, "rewards_train/margins_1": 2.709746837615967, "rewards_train/margins_2": 1.7465115785598755, "step": 642 }, { "epoch": 1.92, "logps_train/policy_1_2": -72.86644744873047, "logps_train/policy_1_l": -90.30770874023438, "logps_train/policy_1_w": -67.30587768554688, "logps_train/policy_2_2": -48.72592544555664, "logps_train/policy_2_w": -103.21458435058594, "logps_train/ref_1_2": -85.0, "logps_train/ref_1_l": -77.0, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -71.0, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 1.2086677551269531, "rewards_train/1-l": -1.2969818115234375, "rewards_train/1-w": 2.544412612915039, "rewards_train/2-2": 2.2516262531280518, "rewards_train/2-w": 1.4879164695739746, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8413944244384766, "rewards_train/margins_1": 1.335744857788086, "rewards_train/margins_2": 0.7637097835540771, "step": 642 }, { "epoch": 1.92, "logps_train/policy_1_2": -191.3597412109375, "logps_train/policy_1_l": -206.64187622070312, "logps_train/policy_1_w": -146.97219848632812, "logps_train/policy_2_2": -167.17022705078125, "logps_train/policy_2_w": -185.84536743164062, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 2.324965000152588, "rewards_train/1-l": -3.7759056091308594, "rewards_train/1-w": 3.617232322692871, "rewards_train/2-2": 3.2337584495544434, "rewards_train/2-w": 1.9998371601104736, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.3931379318237305, "rewards_train/margins_1": 1.2922673225402832, "rewards_train/margins_2": 1.2339212894439697, "step": 642 }, { "epoch": 1.92, "logps_train/policy_1_2": -62.432464599609375, "logps_train/policy_1_l": -81.91079711914062, "logps_train/policy_1_w": -37.925804138183594, "logps_train/policy_2_2": -47.14773941040039, "logps_train/policy_2_w": -62.3663444519043, "logps_train/ref_1_2": -75.0, "logps_train/ref_1_l": -65.5, "logps_train/ref_1_w": -59.0, "logps_train/ref_2_2": -66.0, "logps_train/ref_2_w": -73.0, "rewards_train/1-2": 1.2278475761413574, "rewards_train/1-l": -1.6264317035675049, "rewards_train/1-w": 2.1273417472839355, "rewards_train/2-2": 1.8805384635925293, "rewards_train/2-w": 1.0981311798095703, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7537734508514404, "rewards_train/margins_1": 0.8994941711425781, "rewards_train/margins_2": 0.782407283782959, "step": 642 }, { "epoch": 1.92, "logps_train/policy_1_2": -138.0433807373047, "logps_train/policy_1_l": -193.12823486328125, "logps_train/policy_1_w": -139.78672790527344, "logps_train/policy_2_2": -105.48800659179688, "logps_train/policy_2_w": -190.3951873779297, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 1.7644128799438477, "rewards_train/1-l": -2.1121163368225098, "rewards_train/1-w": 3.225234270095825, "rewards_train/2-2": 3.0605738162994385, "rewards_train/2-w": 1.388020634651184, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.337350606918335, "rewards_train/margins_1": 1.4608213901519775, "rewards_train/margins_2": 1.6725531816482544, "step": 642 }, { "epoch": 1.93, "logps_train/policy_1_2": -119.60208129882812, "logps_train/policy_1_l": -83.34210968017578, "logps_train/policy_1_w": -70.71143341064453, "logps_train/policy_2_2": -75.85366821289062, "logps_train/policy_2_w": -105.21351623535156, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -72.5, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -98.5, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 0.7522919178009033, "rewards_train/1-l": -1.0732738971710205, "rewards_train/1-w": 2.6335442066192627, "rewards_train/2-2": 2.2771332263946533, "rewards_train/2-w": 1.244273066520691, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.706818103790283, "rewards_train/margins_1": 1.8812522888183594, "rewards_train/margins_2": 1.0328601598739624, "step": 643 }, { "epoch": 1.93, "logps_train/policy_1_2": -122.88517761230469, "logps_train/policy_1_l": -158.60650634765625, "logps_train/policy_1_w": -154.426025390625, "logps_train/policy_2_2": -107.22953796386719, "logps_train/policy_2_w": -188.4263916015625, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 2.2450757026672363, "rewards_train/1-l": -1.3442435264587402, "rewards_train/1-w": 3.4542715549468994, "rewards_train/2-2": 2.744429111480713, "rewards_train/2-w": 2.2989635467529297, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.79851508140564, "rewards_train/margins_1": 1.209195852279663, "rewards_train/margins_2": 0.4454655647277832, "step": 643 }, { "epoch": 1.93, "logps_train/policy_1_2": -156.97756958007812, "logps_train/policy_1_l": -216.89913940429688, "logps_train/policy_1_w": -128.08619689941406, "logps_train/policy_2_2": -132.27276611328125, "logps_train/policy_2_w": -157.5906982421875, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 2.1553688049316406, "rewards_train/1-l": -2.1174521446228027, "rewards_train/1-w": 3.3921618461608887, "rewards_train/2-2": 3.3508477210998535, "rewards_train/2-w": 2.2440547943115234, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.509613990783691, "rewards_train/margins_1": 1.236793041229248, "rewards_train/margins_2": 1.10679292678833, "step": 643 }, { "epoch": 1.93, "logps_train/policy_1_2": -173.8727264404297, "logps_train/policy_1_l": -209.06015014648438, "logps_train/policy_1_w": -137.03689575195312, "logps_train/policy_2_2": -132.3687744140625, "logps_train/policy_2_w": -187.18142700195312, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.9049155712127686, "rewards_train/1-l": -2.691953182220459, "rewards_train/1-w": 3.4939658641815186, "rewards_train/2-2": 3.4443717002868652, "rewards_train/2-w": 1.4474818706512451, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.1859190464019775, "rewards_train/margins_1": 1.58905029296875, "rewards_train/margins_2": 1.9968898296356201, "step": 643 }, { "epoch": 1.93, "logps_train/policy_1_2": -201.96104431152344, "logps_train/policy_1_l": -200.15887451171875, "logps_train/policy_1_w": -199.66195678710938, "logps_train/policy_2_2": -162.94158935546875, "logps_train/policy_2_w": -262.04852294921875, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -248.0, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -280.0, "rewards_train/1-2": 1.9570196866989136, "rewards_train/1-l": -2.0440120697021484, "rewards_train/1-w": 4.746304512023926, "rewards_train/2-2": 3.437091827392578, "rewards_train/2-w": 1.8826497793197632, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.790316581726074, "rewards_train/margins_1": 2.789284825325012, "rewards_train/margins_2": 1.554442048072815, "step": 643 }, { "epoch": 1.93, "logps_train/policy_1_2": -123.78173065185547, "logps_train/policy_1_l": -102.66284942626953, "logps_train/policy_1_w": -61.34001922607422, "logps_train/policy_2_2": -79.68413543701172, "logps_train/policy_2_w": -102.05825805664062, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -114.5, "rewards_train/1-2": 1.0237802267074585, "rewards_train/1-l": -2.189136505126953, "rewards_train/1-w": 2.8417792320251465, "rewards_train/2-2": 2.4739692211151123, "rewards_train/2-w": 1.233236312866211, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.0309157371521, "rewards_train/margins_1": 1.817999005317688, "rewards_train/margins_2": 1.2407329082489014, "step": 643 }, { "epoch": 1.93, "logps_train/policy_1_2": -109.95877075195312, "logps_train/policy_1_l": -121.0322036743164, "logps_train/policy_1_w": -78.96800994873047, "logps_train/policy_2_2": -84.56423950195312, "logps_train/policy_2_w": -110.64741516113281, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 1.3220919370651245, "rewards_train/1-l": -1.6176729202270508, "rewards_train/1-w": 2.6079843044281006, "rewards_train/2-2": 2.401388168334961, "rewards_train/2-w": 1.0889695882797241, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.225657224655151, "rewards_train/margins_1": 1.285892367362976, "rewards_train/margins_2": 1.3124185800552368, "step": 643 }, { "epoch": 1.93, "logps_train/policy_1_2": -89.22200012207031, "logps_train/policy_1_l": -121.12620544433594, "logps_train/policy_1_w": -89.9715576171875, "logps_train/policy_2_2": -67.04605102539062, "logps_train/policy_2_w": -121.40028381347656, "logps_train/ref_1_2": -101.0, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -113.5, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 1.1770185232162476, "rewards_train/1-l": -3.0313711166381836, "rewards_train/1-w": 2.3270628452301025, "rewards_train/2-2": 2.044222831726074, "rewards_train/2-w": 0.6959087252616882, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.358433961868286, "rewards_train/margins_1": 1.150044322013855, "rewards_train/margins_2": 1.348314106464386, "step": 643 }, { "epoch": 1.93, "learning_rate": 2.0589471289624018e-08, "loss": 0.4363, "step": 644 }, { "epoch": 1.93, "logps_train/policy_1_2": -170.65707397460938, "logps_train/policy_1_l": -139.55169677734375, "logps_train/policy_1_w": -135.8038330078125, "logps_train/policy_2_2": -132.29620361328125, "logps_train/policy_2_w": -169.84506225585938, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": 1.6499173641204834, "rewards_train/1-l": -0.9668893814086914, "rewards_train/1-w": 3.0539913177490234, "rewards_train/2-2": 3.008662223815918, "rewards_train/2-w": 1.4904930591583252, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.020880699157715, "rewards_train/margins_1": 1.40407395362854, "rewards_train/margins_2": 1.5181691646575928, "step": 644 }, { "epoch": 1.93, "logps_train/policy_1_2": -144.80816650390625, "logps_train/policy_1_l": -135.6998748779297, "logps_train/policy_1_w": -112.13311767578125, "logps_train/policy_2_2": -120.48523712158203, "logps_train/policy_2_w": -141.75103759765625, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.9121519327163696, "rewards_train/1-l": -1.4479174613952637, "rewards_train/1-w": 2.9609079360961914, "rewards_train/2-2": 2.891319751739502, "rewards_train/2-w": 1.8209909200668335, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.408825397491455, "rewards_train/margins_1": 1.0487560033798218, "rewards_train/margins_2": 1.0703288316726685, "step": 644 }, { "epoch": 1.93, "logps_train/policy_1_2": -225.7598876953125, "logps_train/policy_1_l": -204.02064514160156, "logps_train/policy_1_w": -183.6875762939453, "logps_train/policy_2_2": -176.362548828125, "logps_train/policy_2_w": -245.44589233398438, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -230.0, "logps_train/ref_2_2": -226.0, "logps_train/ref_2_w": -266.0, "rewards_train/1-2": 2.4990110397338867, "rewards_train/1-l": -2.2098779678344727, "rewards_train/1-w": 4.656242847442627, "rewards_train/2-2": 4.913744926452637, "rewards_train/2-w": 2.146035671234131, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.8661208152771, "rewards_train/margins_1": 2.1572318077087402, "rewards_train/margins_2": 2.767709255218506, "step": 644 }, { "epoch": 1.93, "logps_train/policy_1_2": -177.8260955810547, "logps_train/policy_1_l": -124.55391693115234, "logps_train/policy_1_w": -97.87689208984375, "logps_train/policy_2_2": -136.7974853515625, "logps_train/policy_2_w": -129.64393615722656, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.5416098833084106, "rewards_train/1-l": -1.333266258239746, "rewards_train/1-w": 3.1129517555236816, "rewards_train/2-2": 3.440563201904297, "rewards_train/2-w": 1.8625593185424805, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.446218013763428, "rewards_train/margins_1": 1.571341872215271, "rewards_train/margins_2": 1.5780038833618164, "step": 644 }, { "epoch": 1.93, "logps_train/policy_1_2": -154.09823608398438, "logps_train/policy_1_l": -197.98995971679688, "logps_train/policy_1_w": -122.34426879882812, "logps_train/policy_2_2": -132.3384552001953, "logps_train/policy_2_w": -140.96519470214844, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.7813878059387207, "rewards_train/1-l": -1.9501678943634033, "rewards_train/1-w": 3.0686984062194824, "rewards_train/2-2": 2.618497610092163, "rewards_train/2-w": 2.1991829872131348, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.018866300582886, "rewards_train/margins_1": 1.2873106002807617, "rewards_train/margins_2": 0.4193146228790283, "step": 644 }, { "epoch": 1.93, "logps_train/policy_1_2": -172.76507568359375, "logps_train/policy_1_l": -191.20443725585938, "logps_train/policy_1_w": -109.22156524658203, "logps_train/policy_2_2": -144.3198699951172, "logps_train/policy_2_w": -140.9721221923828, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.7375543117523193, "rewards_train/1-l": -2.6954426765441895, "rewards_train/1-w": 2.902844190597534, "rewards_train/2-2": 2.7547318935394287, "rewards_train/2-w": 1.7293498516082764, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.598286867141724, "rewards_train/margins_1": 1.1652898788452148, "rewards_train/margins_2": 1.0253820419311523, "step": 644 }, { "epoch": 1.93, "logps_train/policy_1_2": -135.70718383789062, "logps_train/policy_1_l": -154.5533905029297, "logps_train/policy_1_w": -155.81814575195312, "logps_train/policy_2_2": -108.03244018554688, "logps_train/policy_2_w": -187.1685028076172, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 1.781233787536621, "rewards_train/1-l": -2.025261402130127, "rewards_train/1-w": 3.671311855316162, "rewards_train/2-2": 2.878592014312744, "rewards_train/2-w": 2.100336790084839, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.696573257446289, "rewards_train/margins_1": 1.890078067779541, "rewards_train/margins_2": 0.7782552242279053, "step": 644 }, { "epoch": 1.93, "logps_train/policy_1_2": -127.55693054199219, "logps_train/policy_1_l": -164.48153686523438, "logps_train/policy_1_w": -84.4454345703125, "logps_train/policy_2_2": -94.666748046875, "logps_train/policy_2_w": -116.57613372802734, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 1.314620018005371, "rewards_train/1-l": -2.986043930053711, "rewards_train/1-w": 2.7839722633361816, "rewards_train/2-2": 2.6012940406799316, "rewards_train/2-w": 1.442777156829834, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.770016193389893, "rewards_train/margins_1": 1.4693522453308105, "rewards_train/margins_2": 1.1585168838500977, "step": 644 }, { "epoch": 1.93, "logps_train/policy_1_2": -232.00311279296875, "logps_train/policy_1_l": -186.05044555664062, "logps_train/policy_1_w": -127.38420104980469, "logps_train/policy_2_2": -187.85098266601562, "logps_train/policy_2_w": -167.473876953125, "logps_train/ref_1_2": -253.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 2.102814197540283, "rewards_train/1-l": -2.0112953186035156, "rewards_train/1-w": 3.9959542751312256, "rewards_train/2-2": 4.028963088989258, "rewards_train/2-w": 2.552611827850342, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.007249593734741, "rewards_train/margins_1": 1.8931400775909424, "rewards_train/margins_2": 1.476351261138916, "step": 645 }, { "epoch": 1.93, "logps_train/policy_1_2": -100.21946716308594, "logps_train/policy_1_l": -140.4569854736328, "logps_train/policy_1_w": -73.03309631347656, "logps_train/policy_2_2": -58.592864990234375, "logps_train/policy_2_w": -104.00682067871094, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -80.0, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": 1.1436786651611328, "rewards_train/1-l": -1.044429063796997, "rewards_train/1-w": 2.4922962188720703, "rewards_train/2-2": 2.1285557746887207, "rewards_train/2-w": 1.108302354812622, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5367252826690674, "rewards_train/margins_1": 1.3486175537109375, "rewards_train/margins_2": 1.0202534198760986, "step": 645 }, { "epoch": 1.93, "logps_train/policy_1_2": -129.80117797851562, "logps_train/policy_1_l": -117.61298370361328, "logps_train/policy_1_w": -60.9620475769043, "logps_train/policy_2_2": -92.76366424560547, "logps_train/policy_2_w": -101.3751220703125, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -85.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -112.5, "rewards_train/1-2": 0.7878503799438477, "rewards_train/1-l": -1.4363961219787598, "rewards_train/1-w": 2.4030139446258545, "rewards_train/2-2": 2.553321361541748, "rewards_train/2-w": 1.0984243154525757, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8394100666046143, "rewards_train/margins_1": 1.6151635646820068, "rewards_train/margins_2": 1.4548970460891724, "step": 645 }, { "epoch": 1.93, "logps_train/policy_1_2": -139.7142791748047, "logps_train/policy_1_l": -109.46533203125, "logps_train/policy_1_w": -82.84088134765625, "logps_train/policy_2_2": -103.29930877685547, "logps_train/policy_2_w": -107.74856567382812, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 0.9441977143287659, "rewards_train/1-l": -2.1082520484924316, "rewards_train/1-w": 2.294037103652954, "rewards_train/2-2": 2.493506908416748, "rewards_train/2-w": 1.240768551826477, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.402289152145386, "rewards_train/margins_1": 1.3498393893241882, "rewards_train/margins_2": 1.252738356590271, "step": 645 }, { "epoch": 1.93, "logps_train/policy_1_2": -213.62017822265625, "logps_train/policy_1_l": -158.45388793945312, "logps_train/policy_1_w": -176.86526489257812, "logps_train/policy_2_2": -162.6456298828125, "logps_train/policy_2_w": -245.51333618164062, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -229.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -272.0, "rewards_train/1-2": 2.0129804611206055, "rewards_train/1-l": -2.1522247791290283, "rewards_train/1-w": 5.2244110107421875, "rewards_train/2-2": 4.094812870025635, "rewards_train/2-w": 2.6471030712127686, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.376635789871216, "rewards_train/margins_1": 3.211430549621582, "rewards_train/margins_2": 1.4477097988128662, "step": 645 }, { "epoch": 1.93, "logps_train/policy_1_2": -190.7552947998047, "logps_train/policy_1_l": -181.5325164794922, "logps_train/policy_1_w": -109.79676818847656, "logps_train/policy_2_2": -150.31936645507812, "logps_train/policy_2_w": -145.331787109375, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.2205640077590942, "rewards_train/1-l": -2.865750789642334, "rewards_train/1-w": 2.938096523284912, "rewards_train/2-2": 3.1116185188293457, "rewards_train/2-w": 1.7359614372253418, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.803847312927246, "rewards_train/margins_1": 1.7175325155258179, "rewards_train/margins_2": 1.375657081604004, "step": 645 }, { "epoch": 1.93, "logps_train/policy_1_2": -168.60801696777344, "logps_train/policy_1_l": -142.71484375, "logps_train/policy_1_w": -88.26629638671875, "logps_train/policy_2_2": -122.87284851074219, "logps_train/policy_2_w": -114.76992797851562, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 0.9501346349716187, "rewards_train/1-l": -2.501171112060547, "rewards_train/1-w": 2.8389949798583984, "rewards_train/2-2": 3.322871208190918, "rewards_train/2-w": 1.4378505945205688, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.340166091918945, "rewards_train/margins_1": 1.8888603448867798, "rewards_train/margins_2": 1.8850206136703491, "step": 645 }, { "epoch": 1.93, "logps_train/policy_1_2": -168.40097045898438, "logps_train/policy_1_l": -149.94212341308594, "logps_train/policy_1_w": -113.18904113769531, "logps_train/policy_2_2": -128.95062255859375, "logps_train/policy_2_w": -145.60845947265625, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 1.1489651203155518, "rewards_train/1-l": -1.8916726112365723, "rewards_train/1-w": 2.3023853302001953, "rewards_train/2-2": 3.03071928024292, "rewards_train/2-w": 1.359857201576233, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.194057941436768, "rewards_train/margins_1": 1.1534202098846436, "rewards_train/margins_2": 1.670862078666687, "step": 645 }, { "epoch": 1.93, "learning_rate": 1.7547259966207708e-08, "loss": 0.3649, "step": 646 }, { "epoch": 1.93, "logps_train/policy_1_2": -130.77487182617188, "logps_train/policy_1_l": -154.51809692382812, "logps_train/policy_1_w": -72.25383758544922, "logps_train/policy_2_2": -94.13075256347656, "logps_train/policy_2_w": -105.93914794921875, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -94.5, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 0.744387686252594, "rewards_train/1-l": -2.352200984954834, "rewards_train/1-w": 2.231647491455078, "rewards_train/2-2": 2.2111434936523438, "rewards_train/2-w": 0.8404600620269775, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.583848476409912, "rewards_train/margins_1": 1.4872598052024841, "rewards_train/margins_2": 1.3706834316253662, "step": 646 }, { "epoch": 1.93, "logps_train/policy_1_2": -188.84426879882812, "logps_train/policy_1_l": -185.2759552001953, "logps_train/policy_1_w": -123.03960418701172, "logps_train/policy_2_2": -137.21189880371094, "logps_train/policy_2_w": -177.9261474609375, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 1.3530738353729248, "rewards_train/1-l": -1.979548692703247, "rewards_train/1-w": 3.2554144859313965, "rewards_train/2-2": 2.6944358348846436, "rewards_train/2-w": 1.338634967803955, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.2349631786346436, "rewards_train/margins_1": 1.9023406505584717, "rewards_train/margins_2": 1.3558008670806885, "step": 646 }, { "epoch": 1.93, "logps_train/policy_1_2": -188.006103515625, "logps_train/policy_1_l": -209.09828186035156, "logps_train/policy_1_w": -172.80735778808594, "logps_train/policy_2_2": -156.53890991210938, "logps_train/policy_2_w": -228.1656494140625, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -216.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -252.0, "rewards_train/1-2": 2.7806406021118164, "rewards_train/1-l": -2.9426417350769043, "rewards_train/1-w": 4.343482971191406, "rewards_train/2-2": 3.8617334365844727, "rewards_train/2-w": 2.324841260910034, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.2861247062683105, "rewards_train/margins_1": 1.5628423690795898, "rewards_train/margins_2": 1.5368921756744385, "step": 646 }, { "epoch": 1.93, "logps_train/policy_1_2": -174.5133056640625, "logps_train/policy_1_l": -160.4144287109375, "logps_train/policy_1_w": -173.22047424316406, "logps_train/policy_2_2": -138.40318298339844, "logps_train/policy_2_w": -211.46453857421875, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -209.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": 1.7111694812774658, "rewards_train/1-l": -1.1547231674194336, "rewards_train/1-w": 3.5162336826324463, "rewards_train/2-2": 3.4503071308135986, "rewards_train/2-w": 1.837920904159546, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.67095685005188, "rewards_train/margins_1": 1.8050642013549805, "rewards_train/margins_2": 1.6123862266540527, "step": 646 }, { "epoch": 1.93, "logps_train/policy_1_2": -193.34133911132812, "logps_train/policy_1_l": -232.28759765625, "logps_train/policy_1_w": -167.81320190429688, "logps_train/policy_2_2": -136.91412353515625, "logps_train/policy_2_w": -222.99826049804688, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -242.0, "rewards_train/1-2": 2.2221152782440186, "rewards_train/1-l": -2.461963176727295, "rewards_train/1-w": 4.237429141998291, "rewards_train/2-2": 3.8398382663726807, "rewards_train/2-w": 1.956425428390503, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.699392318725586, "rewards_train/margins_1": 2.0153138637542725, "rewards_train/margins_2": 1.8834128379821777, "step": 646 }, { "epoch": 1.93, "logps_train/policy_1_2": -187.67462158203125, "logps_train/policy_1_l": -164.48312377929688, "logps_train/policy_1_w": -100.0921630859375, "logps_train/policy_2_2": -144.11441040039062, "logps_train/policy_2_w": -121.8536376953125, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.7325377464294434, "rewards_train/1-l": -2.878000497817993, "rewards_train/1-w": 3.631409168243408, "rewards_train/2-2": 3.786997079849243, "rewards_train/2-w": 2.5865116119384766, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.509409666061401, "rewards_train/margins_1": 1.8988714218139648, "rewards_train/margins_2": 1.2004854679107666, "step": 646 }, { "epoch": 1.93, "logps_train/policy_1_2": -158.37448120117188, "logps_train/policy_1_l": -116.89488983154297, "logps_train/policy_1_w": -135.019287109375, "logps_train/policy_2_2": -114.55146789550781, "logps_train/policy_2_w": -184.0933837890625, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.0891141891479492, "rewards_train/1-l": -0.6397821307182312, "rewards_train/1-w": 2.978637218475342, "rewards_train/2-2": 2.9296178817749023, "rewards_train/2-w": 0.9539442658424377, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.618419349193573, "rewards_train/margins_1": 1.8895230293273926, "rewards_train/margins_2": 1.9756736159324646, "step": 646 }, { "epoch": 1.93, "logps_train/policy_1_2": -188.9337921142578, "logps_train/policy_1_l": -167.00582885742188, "logps_train/policy_1_w": -107.33192443847656, "logps_train/policy_2_2": -142.15850830078125, "logps_train/policy_2_w": -149.44906616210938, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.4456831216812134, "rewards_train/1-l": -2.815427303314209, "rewards_train/1-w": 2.9980580806732178, "rewards_train/2-2": 3.3747739791870117, "rewards_train/2-w": 1.5550925731658936, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.813485383987427, "rewards_train/margins_1": 1.5523749589920044, "rewards_train/margins_2": 1.8196814060211182, "step": 646 }, { "epoch": 1.94, "logps_train/policy_1_2": -107.96297454833984, "logps_train/policy_1_l": -98.14161682128906, "logps_train/policy_1_w": -63.34571075439453, "logps_train/policy_2_2": -79.78585052490234, "logps_train/policy_2_w": -91.05299377441406, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -86.5, "logps_train/ref_1_w": -85.5, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -107.0, "rewards_train/1-2": 1.1113197803497314, "rewards_train/1-l": -1.1517716646194458, "rewards_train/1-w": 2.2224602699279785, "rewards_train/2-2": 2.2087202072143555, "rewards_train/2-w": 1.6314201354980469, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3742319345474243, "rewards_train/margins_1": 1.111140489578247, "rewards_train/margins_2": 0.5773000717163086, "step": 647 }, { "epoch": 1.94, "logps_train/policy_1_2": -158.0961456298828, "logps_train/policy_1_l": -187.97219848632812, "logps_train/policy_1_w": -103.83384704589844, "logps_train/policy_2_2": -128.70135498046875, "logps_train/policy_2_w": -132.00001525878906, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.6060104370117188, "rewards_train/1-l": -1.96128249168396, "rewards_train/1-w": 2.6041154861450195, "rewards_train/2-2": 2.804863929748535, "rewards_train/2-w": 1.7787091732025146, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.5653979778289795, "rewards_train/margins_1": 0.9981050491333008, "rewards_train/margins_2": 1.0261547565460205, "step": 647 }, { "epoch": 1.94, "logps_train/policy_1_2": -142.30368041992188, "logps_train/policy_1_l": -113.52364349365234, "logps_train/policy_1_w": -151.51254272460938, "logps_train/policy_2_2": -119.84504699707031, "logps_train/policy_2_w": -180.09457397460938, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": 1.4805703163146973, "rewards_train/1-l": -0.9211142659187317, "rewards_train/1-w": 2.659684181213379, "rewards_train/2-2": 2.3006515502929688, "rewards_train/2-w": 1.6858547925949097, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.5807984471321106, "rewards_train/margins_1": 1.1791138648986816, "rewards_train/margins_2": 0.6147967576980591, "step": 647 }, { "epoch": 1.94, "logps_train/policy_1_2": -172.8590087890625, "logps_train/policy_1_l": -192.47171020507812, "logps_train/policy_1_w": -112.75523376464844, "logps_train/policy_2_2": -143.04222106933594, "logps_train/policy_2_w": -147.2086181640625, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.7672240734100342, "rewards_train/1-l": -2.1120152473449707, "rewards_train/1-w": 3.205726146697998, "rewards_train/2-2": 2.9879655838012695, "rewards_train/2-w": 2.069762706756592, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.317741394042969, "rewards_train/margins_1": 1.4385020732879639, "rewards_train/margins_2": 0.9182028770446777, "step": 647 }, { "epoch": 1.94, "logps_train/policy_1_2": -165.6593475341797, "logps_train/policy_1_l": -187.90792846679688, "logps_train/policy_1_w": -149.51480102539062, "logps_train/policy_2_2": -146.53662109375, "logps_train/policy_2_w": -192.66220092773438, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 2.6067214012145996, "rewards_train/1-l": -1.244698405265808, "rewards_train/1-w": 3.302034854888916, "rewards_train/2-2": 3.317430257797241, "rewards_train/2-w": 2.316202163696289, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.546733260154724, "rewards_train/margins_1": 0.6953134536743164, "rewards_train/margins_2": 1.0012280941009521, "step": 647 }, { "epoch": 1.94, "logps_train/policy_1_2": -129.97463989257812, "logps_train/policy_1_l": -149.03431701660156, "logps_train/policy_1_w": -76.13773345947266, "logps_train/policy_2_2": -100.44508361816406, "logps_train/policy_2_w": -102.27994537353516, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 1.0095678567886353, "rewards_train/1-l": -2.1006968021392822, "rewards_train/1-w": 2.2026331424713135, "rewards_train/2-2": 2.2363507747650146, "rewards_train/2-w": 1.3368492126464844, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.303329944610596, "rewards_train/margins_1": 1.1930652856826782, "rewards_train/margins_2": 0.8995015621185303, "step": 647 }, { "epoch": 1.94, "logps_train/policy_1_2": -51.21775817871094, "logps_train/policy_1_l": -35.43443298339844, "logps_train/policy_1_w": -23.481273651123047, "logps_train/policy_2_2": -41.77745819091797, "logps_train/policy_2_w": -37.55608367919922, "logps_train/ref_1_2": -59.0, "logps_train/ref_1_l": -20.875, "logps_train/ref_1_w": -32.75, "logps_train/ref_2_2": -54.5, "logps_train/ref_2_w": -42.25, "rewards_train/1-2": 0.7793960571289062, "rewards_train/1-l": -1.456333875656128, "rewards_train/1-w": 0.9354662895202637, "rewards_train/2-2": 1.2644416093826294, "rewards_train/2-w": 0.46548497676849365, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.3918001651763916, "rewards_train/margins_1": 0.15607023239135742, "rewards_train/margins_2": 0.7989566326141357, "step": 647 }, { "epoch": 1.94, "logps_train/policy_1_2": -99.95802307128906, "logps_train/policy_1_l": -116.02426147460938, "logps_train/policy_1_w": -68.15580749511719, "logps_train/policy_2_2": -71.53189086914062, "logps_train/policy_2_w": -95.40802764892578, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -102.5, "rewards_train/1-2": 1.3963855504989624, "rewards_train/1-l": -2.3041844367980957, "rewards_train/1-w": 1.5981885194778442, "rewards_train/2-2": 2.021029472351074, "rewards_train/2-w": 0.6882986426353455, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.90237295627594, "rewards_train/margins_1": 0.20180296897888184, "rewards_train/margins_2": 1.3327308297157288, "step": 647 }, { "epoch": 1.94, "learning_rate": 1.4747331653923725e-08, "loss": 0.4052, "step": 648 }, { "epoch": 1.94, "logps_train/policy_1_2": -102.03266906738281, "logps_train/policy_1_l": -103.1142578125, "logps_train/policy_1_w": -59.82027816772461, "logps_train/policy_2_2": -68.77865600585938, "logps_train/policy_2_w": -104.26080322265625, "logps_train/ref_1_2": -111.5, "logps_train/ref_1_l": -85.0, "logps_train/ref_1_w": -85.0, "logps_train/ref_2_2": -89.0, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 0.9357955455780029, "rewards_train/1-l": -1.7954097986221313, "rewards_train/1-w": 2.4968783855438232, "rewards_train/2-2": 1.994180679321289, "rewards_train/2-w": 0.9407162666320801, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.292288184165955, "rewards_train/margins_1": 1.5610828399658203, "rewards_train/margins_2": 1.053464412689209, "step": 648 }, { "epoch": 1.94, "logps_train/policy_1_2": -243.97787475585938, "logps_train/policy_1_l": -216.2845458984375, "logps_train/policy_1_w": -145.04425048828125, "logps_train/policy_2_2": -191.5703125, "logps_train/policy_2_w": -180.16952514648438, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -229.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 1.5053378343582153, "rewards_train/1-l": -2.6932990550994873, "rewards_train/1-w": 3.8346383571624756, "rewards_train/2-2": 3.721095085144043, "rewards_train/2-w": 2.268986701965332, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.527937412261963, "rewards_train/margins_1": 2.3293005228042603, "rewards_train/margins_2": 1.452108383178711, "step": 648 }, { "epoch": 1.94, "logps_train/policy_1_2": -220.0938720703125, "logps_train/policy_1_l": -249.0979461669922, "logps_train/policy_1_w": -165.5026092529297, "logps_train/policy_2_2": -172.14791870117188, "logps_train/policy_2_w": -229.79876708984375, "logps_train/ref_1_2": -241.0, "logps_train/ref_1_l": -224.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 2.021862506866455, "rewards_train/1-l": -2.4295215606689453, "rewards_train/1-w": 4.151301383972168, "rewards_train/2-2": 4.027395248413086, "rewards_train/2-w": 2.4013731479644775, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.580822944641113, "rewards_train/margins_1": 2.129438877105713, "rewards_train/margins_2": 1.6260221004486084, "step": 648 }, { "epoch": 1.94, "logps_train/policy_1_2": -116.10778045654297, "logps_train/policy_1_l": -250.06224060058594, "logps_train/policy_1_w": -123.93233489990234, "logps_train/policy_2_2": -90.88286590576172, "logps_train/policy_2_w": -152.69265747070312, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -220.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.3341439962387085, "rewards_train/1-l": -2.952610492706299, "rewards_train/1-w": 2.758328914642334, "rewards_train/2-2": 2.044135093688965, "rewards_train/2-w": 1.4838600158691406, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.710939407348633, "rewards_train/margins_1": 1.4241849184036255, "rewards_train/margins_2": 0.5602750778198242, "step": 648 }, { "epoch": 1.94, "logps_train/policy_1_2": -231.1431427001953, "logps_train/policy_1_l": -194.15390014648438, "logps_train/policy_1_w": -153.6070556640625, "logps_train/policy_2_2": -188.46923828125, "logps_train/policy_2_w": -181.90249633789062, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 1.8825603723526, "rewards_train/1-l": -2.4901957511901855, "rewards_train/1-w": 3.3424196243286133, "rewards_train/2-2": 3.8983876705169678, "rewards_train/2-w": 2.325376272201538, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.832615375518799, "rewards_train/margins_1": 1.4598592519760132, "rewards_train/margins_2": 1.5730113983154297, "step": 648 }, { "epoch": 1.94, "logps_train/policy_1_2": -147.0977783203125, "logps_train/policy_1_l": -108.0850830078125, "logps_train/policy_1_w": -103.91816711425781, "logps_train/policy_2_2": -111.13580322265625, "logps_train/policy_2_w": -133.45632934570312, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": 1.2261593341827393, "rewards_train/1-l": -1.367297887802124, "rewards_train/1-w": 2.961308717727661, "rewards_train/2-2": 2.727043867111206, "rewards_train/2-w": 1.7606176137924194, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.328606605529785, "rewards_train/margins_1": 1.7351493835449219, "rewards_train/margins_2": 0.9664262533187866, "step": 648 }, { "epoch": 1.94, "logps_train/policy_1_2": -124.39100646972656, "logps_train/policy_1_l": -116.82215881347656, "logps_train/policy_1_w": -97.22894287109375, "logps_train/policy_2_2": -94.78408813476562, "logps_train/policy_2_w": -137.79542541503906, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -96.5, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.3030871152877808, "rewards_train/1-l": -2.027723789215088, "rewards_train/1-w": 2.994293212890625, "rewards_train/2-2": 2.116903305053711, "rewards_train/2-w": 1.388426661491394, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.022017002105713, "rewards_train/margins_1": 1.6912060976028442, "rewards_train/margins_2": 0.7284766435623169, "step": 648 }, { "epoch": 1.94, "logps_train/policy_1_2": -195.49551391601562, "logps_train/policy_1_l": -222.03550720214844, "logps_train/policy_1_w": -111.52711486816406, "logps_train/policy_2_2": -148.1370849609375, "logps_train/policy_2_w": -160.53298950195312, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.6363863945007324, "rewards_train/1-l": -3.2587761878967285, "rewards_train/1-w": 3.6101794242858887, "rewards_train/2-2": 3.4948863983154297, "rewards_train/2-w": 1.7842001914978027, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.868955612182617, "rewards_train/margins_1": 1.9737930297851562, "rewards_train/margins_2": 1.710686206817627, "step": 648 }, { "epoch": 1.94, "logps_train/policy_1_2": -154.961669921875, "logps_train/policy_1_l": -108.57498168945312, "logps_train/policy_1_w": -113.30670166015625, "logps_train/policy_2_2": -123.42652893066406, "logps_train/policy_2_w": -146.63192749023438, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.9288334846496582, "rewards_train/1-l": -1.2778104543685913, "rewards_train/1-w": 2.9794864654541016, "rewards_train/2-2": 3.0698468685150146, "rewards_train/2-w": 1.6172747611999512, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.257296919822693, "rewards_train/margins_1": 1.0506529808044434, "rewards_train/margins_2": 1.4525721073150635, "step": 649 }, { "epoch": 1.94, "logps_train/policy_1_2": -188.63858032226562, "logps_train/policy_1_l": -172.41522216796875, "logps_train/policy_1_w": -163.1934814453125, "logps_train/policy_2_2": -137.96798706054688, "logps_train/policy_2_w": -227.505126953125, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -244.0, "rewards_train/1-2": 1.6595795154571533, "rewards_train/1-l": -2.417694568634033, "rewards_train/1-w": 4.133777141571045, "rewards_train/2-2": 3.1860134601593018, "rewards_train/2-w": 1.6416760683059692, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.551471710205078, "rewards_train/margins_1": 2.4741976261138916, "rewards_train/margins_2": 1.5443373918533325, "step": 649 }, { "epoch": 1.94, "logps_train/policy_1_2": -167.94215393066406, "logps_train/policy_1_l": -174.29354858398438, "logps_train/policy_1_w": -95.36184692382812, "logps_train/policy_2_2": -132.77493286132812, "logps_train/policy_2_w": -127.65217590332031, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 2.343675374984741, "rewards_train/1-l": -1.6895115375518799, "rewards_train/1-w": 3.399752140045166, "rewards_train/2-2": 3.826021671295166, "rewards_train/2-w": 2.4004077911376953, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.089263677597046, "rewards_train/margins_1": 1.0560767650604248, "rewards_train/margins_2": 1.4256138801574707, "step": 649 }, { "epoch": 1.94, "logps_train/policy_1_2": -96.05411529541016, "logps_train/policy_1_l": -157.94345092773438, "logps_train/policy_1_w": -106.01553344726562, "logps_train/policy_2_2": -69.68939208984375, "logps_train/policy_2_w": -135.71470642089844, "logps_train/ref_1_2": -112.5, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 1.6195884943008423, "rewards_train/1-l": -2.27657151222229, "rewards_train/1-w": 3.0906341075897217, "rewards_train/2-2": 2.8146543502807617, "rewards_train/2-w": 1.5972785949707031, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.367205619812012, "rewards_train/margins_1": 1.4710456132888794, "rewards_train/margins_2": 1.2173757553100586, "step": 649 }, { "epoch": 1.94, "logps_train/policy_1_2": -122.02313995361328, "logps_train/policy_1_l": -97.35713195800781, "logps_train/policy_1_w": -78.89368438720703, "logps_train/policy_2_2": -92.29161071777344, "logps_train/policy_2_w": -112.51669311523438, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -84.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.0609674453735352, "rewards_train/1-l": -1.3548543453216553, "rewards_train/1-w": 2.4106314182281494, "rewards_train/2-2": 2.2770886421203613, "rewards_train/2-w": 1.069814682006836, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7654857635498047, "rewards_train/margins_1": 1.3496639728546143, "rewards_train/margins_2": 1.2072739601135254, "step": 649 }, { "epoch": 1.94, "logps_train/policy_1_2": -182.90841674804688, "logps_train/policy_1_l": -127.7140121459961, "logps_train/policy_1_w": -85.47620391845703, "logps_train/policy_2_2": -142.53884887695312, "logps_train/policy_2_w": -116.14912414550781, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 1.94822096824646, "rewards_train/1-l": -1.9536278247833252, "rewards_train/1-w": 2.7656607627868652, "rewards_train/2-2": 3.5101778507232666, "rewards_train/2-w": 1.988603115081787, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.71928858757019, "rewards_train/margins_1": 0.8174397945404053, "rewards_train/margins_2": 1.5215747356414795, "step": 649 }, { "epoch": 1.94, "logps_train/policy_1_2": -262.8653869628906, "logps_train/policy_1_l": -162.27963256835938, "logps_train/policy_1_w": -182.82345581054688, "logps_train/policy_2_2": -200.84176635742188, "logps_train/policy_2_w": -231.11270141601562, "logps_train/ref_1_2": -286.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -226.0, "logps_train/ref_2_2": -245.0, "logps_train/ref_2_w": -256.0, "rewards_train/1-2": 2.2697112560272217, "rewards_train/1-l": -1.2991549968719482, "rewards_train/1-w": 4.342655181884766, "rewards_train/2-2": 4.365823745727539, "rewards_train/2-w": 2.4699795246124268, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.641810178756714, "rewards_train/margins_1": 2.072943925857544, "rewards_train/margins_2": 1.8958442211151123, "step": 649 }, { "epoch": 1.94, "logps_train/policy_1_2": -64.95468139648438, "logps_train/policy_1_l": -97.66752624511719, "logps_train/policy_1_w": -34.06982421875, "logps_train/policy_2_2": -48.22755432128906, "logps_train/policy_2_w": -46.23906707763672, "logps_train/ref_1_2": -78.0, "logps_train/ref_1_l": -71.0, "logps_train/ref_1_w": -48.5, "logps_train/ref_2_2": -69.0, "logps_train/ref_2_w": -55.0, "rewards_train/1-2": 1.275625467300415, "rewards_train/1-l": -2.6497597694396973, "rewards_train/1-w": 1.4609864950180054, "rewards_train/2-2": 2.056931972503662, "rewards_train/2-w": 0.8565618991851807, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.110746264457703, "rewards_train/margins_1": 0.18536102771759033, "rewards_train/margins_2": 1.2003700733184814, "step": 649 }, { "epoch": 1.95, "learning_rate": 1.2189959620839686e-08, "loss": 0.3464, "step": 650 }, { "epoch": 1.95, "logps_train/policy_1_2": -123.48889923095703, "logps_train/policy_1_l": -171.569580078125, "logps_train/policy_1_w": -113.00135803222656, "logps_train/policy_2_2": -92.2658920288086, "logps_train/policy_2_w": -154.23577880859375, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.040954351425171, "rewards_train/1-l": -2.4057867527008057, "rewards_train/1-w": 2.359825372695923, "rewards_train/2-2": 1.996067762374878, "rewards_train/2-w": 0.820171058177948, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.7656121253967285, "rewards_train/margins_1": 1.318871021270752, "rewards_train/margins_2": 1.17589670419693, "step": 650 }, { "epoch": 1.95, "logps_train/policy_1_2": -139.72674560546875, "logps_train/policy_1_l": -170.64968872070312, "logps_train/policy_1_w": -110.4404296875, "logps_train/policy_2_2": -113.58029174804688, "logps_train/policy_2_w": -136.3159942626953, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 1.7913885116577148, "rewards_train/1-l": -3.082937717437744, "rewards_train/1-w": 2.9793941974639893, "rewards_train/2-2": 2.9241976737976074, "rewards_train/2-w": 1.653557538986206, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.062331914901733, "rewards_train/margins_1": 1.1880056858062744, "rewards_train/margins_2": 1.2706401348114014, "step": 650 }, { "epoch": 1.95, "logps_train/policy_1_2": -138.68295288085938, "logps_train/policy_1_l": -149.50550842285156, "logps_train/policy_1_w": -107.48960876464844, "logps_train/policy_2_2": -106.62751770019531, "logps_train/policy_2_w": -139.71151733398438, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 1.4449868202209473, "rewards_train/1-l": -2.503675937652588, "rewards_train/1-w": 2.7234997749328613, "rewards_train/2-2": 3.0325610637664795, "rewards_train/2-w": 1.4936909675598145, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.227175712585449, "rewards_train/margins_1": 1.278512954711914, "rewards_train/margins_2": 1.538870096206665, "step": 650 }, { "epoch": 1.95, "logps_train/policy_1_2": -140.12322998046875, "logps_train/policy_1_l": -125.21061706542969, "logps_train/policy_1_w": -126.64691162109375, "logps_train/policy_2_2": -101.9041519165039, "logps_train/policy_2_w": -167.4463348388672, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.1283023357391357, "rewards_train/1-l": -2.3655924797058105, "rewards_train/1-w": 3.0325748920440674, "rewards_train/2-2": 2.625600814819336, "rewards_train/2-w": 1.2643500566482544, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.398167371749878, "rewards_train/margins_1": 1.9042725563049316, "rewards_train/margins_2": 1.3612507581710815, "step": 650 }, { "epoch": 1.95, "logps_train/policy_1_2": -246.3128204345703, "logps_train/policy_1_l": -206.56930541992188, "logps_train/policy_1_w": -167.42080688476562, "logps_train/policy_2_2": -213.58499145507812, "logps_train/policy_2_w": -207.346923828125, "logps_train/ref_1_2": -274.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -253.0, "logps_train/ref_2_w": -236.0, "rewards_train/1-2": 2.7937164306640625, "rewards_train/1-l": -1.9381797313690186, "rewards_train/1-w": 4.332920074462891, "rewards_train/2-2": 3.907125473022461, "rewards_train/2-w": 2.8653085231781006, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.271099805831909, "rewards_train/margins_1": 1.5392036437988281, "rewards_train/margins_2": 1.0418169498443604, "step": 650 }, { "epoch": 1.95, "logps_train/policy_1_2": -72.48099517822266, "logps_train/policy_1_l": -79.22187805175781, "logps_train/policy_1_w": -78.62296295166016, "logps_train/policy_2_2": -56.86177444458008, "logps_train/policy_2_w": -103.420654296875, "logps_train/ref_1_2": -87.0, "logps_train/ref_1_l": -72.5, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -77.0, "logps_train/ref_2_w": -116.0, "rewards_train/1-2": 1.475435733795166, "rewards_train/1-l": -0.6647655963897705, "rewards_train/1-w": 2.531649112701416, "rewards_train/2-2": 1.9921916723251343, "rewards_train/2-w": 1.2729735374450684, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.1964147090911865, "rewards_train/margins_1": 1.05621337890625, "rewards_train/margins_2": 0.7192181348800659, "step": 650 }, { "epoch": 1.95, "logps_train/policy_1_2": -174.33131408691406, "logps_train/policy_1_l": -280.2720031738281, "logps_train/policy_1_w": -145.09982299804688, "logps_train/policy_2_2": -133.69125366210938, "logps_train/policy_2_w": -195.614990234375, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -244.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 1.637181043624878, "rewards_train/1-l": -3.5916552543640137, "rewards_train/1-w": 3.6470484733581543, "rewards_train/2-2": 3.1636877059936523, "rewards_train/2-w": 2.0900626182556152, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.238703727722168, "rewards_train/margins_1": 2.0098674297332764, "rewards_train/margins_2": 1.073625087738037, "step": 650 }, { "epoch": 1.95, "logps_train/policy_1_2": -153.30665588378906, "logps_train/policy_1_l": -212.9504852294922, "logps_train/policy_1_w": -143.38662719726562, "logps_train/policy_2_2": -112.13877868652344, "logps_train/policy_2_w": -207.05221557617188, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -221.0, "rewards_train/1-2": 1.7193341255187988, "rewards_train/1-l": -2.770047664642334, "rewards_train/1-w": 3.756258726119995, "rewards_train/2-2": 3.0165905952453613, "rewards_train/2-w": 1.3400901556015015, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.526306390762329, "rewards_train/margins_1": 2.0369246006011963, "rewards_train/margins_2": 1.6765004396438599, "step": 650 }, { "epoch": 1.95, "logps_train/policy_1_2": -137.23623657226562, "logps_train/policy_1_l": -234.27928161621094, "logps_train/policy_1_w": -142.14492797851562, "logps_train/policy_2_2": -102.20672607421875, "logps_train/policy_2_w": -182.81710815429688, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.3045015335083008, "rewards_train/1-l": -2.951756715774536, "rewards_train/1-w": 3.1776952743530273, "rewards_train/2-2": 2.743389844894409, "rewards_train/2-w": 1.5479764938354492, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.1294519901275635, "rewards_train/margins_1": 1.8731937408447266, "rewards_train/margins_2": 1.19541335105896, "step": 651 }, { "epoch": 1.95, "logps_train/policy_1_2": -150.95114135742188, "logps_train/policy_1_l": -176.79635620117188, "logps_train/policy_1_w": -111.05921936035156, "logps_train/policy_2_2": -127.31627655029297, "logps_train/policy_2_w": -140.58486938476562, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.9111363887786865, "rewards_train/1-l": -2.7989728450775146, "rewards_train/1-w": 3.2972025871276855, "rewards_train/2-2": 3.0647592544555664, "rewards_train/2-w": 2.1493260860443115, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.0961754322052, "rewards_train/margins_1": 1.386066198348999, "rewards_train/margins_2": 0.9154331684112549, "step": 651 }, { "epoch": 1.95, "logps_train/policy_1_2": -253.20358276367188, "logps_train/policy_1_l": -201.8045654296875, "logps_train/policy_1_w": -201.4136962890625, "logps_train/policy_2_2": -211.00369262695312, "logps_train/policy_2_w": -247.39491271972656, "logps_train/ref_1_2": -274.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -243.0, "logps_train/ref_2_2": -250.0, "logps_train/ref_2_w": -272.0, "rewards_train/1-2": 2.1010303497314453, "rewards_train/1-l": -2.010699987411499, "rewards_train/1-w": 4.125036239624023, "rewards_train/2-2": 3.8801236152648926, "rewards_train/2-w": 2.3816022872924805, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.1357362270355225, "rewards_train/margins_1": 2.024005889892578, "rewards_train/margins_2": 1.498521327972412, "step": 651 }, { "epoch": 1.95, "logps_train/policy_1_2": -240.39048767089844, "logps_train/policy_1_l": -228.5211944580078, "logps_train/policy_1_w": -190.16220092773438, "logps_train/policy_2_2": -201.62164306640625, "logps_train/policy_2_w": -226.78097534179688, "logps_train/ref_1_2": -264.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -228.0, "logps_train/ref_2_2": -242.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 2.362513303756714, "rewards_train/1-l": -2.0927443504333496, "rewards_train/1-w": 3.719719171524048, "rewards_train/2-2": 4.0815863609313965, "rewards_train/2-w": 2.150026321411133, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.8124635219573975, "rewards_train/margins_1": 1.357205867767334, "rewards_train/margins_2": 1.9315600395202637, "step": 651 }, { "epoch": 1.95, "logps_train/policy_1_2": -116.22503662109375, "logps_train/policy_1_l": -143.13848876953125, "logps_train/policy_1_w": -74.28678894042969, "logps_train/policy_2_2": -91.41242980957031, "logps_train/policy_2_w": -114.20556640625, "logps_train/ref_1_2": -127.5, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.152496337890625, "rewards_train/1-l": -2.156818389892578, "rewards_train/1-w": 2.3947582244873047, "rewards_train/2-2": 2.247037887573242, "rewards_train/2-w": 0.8872554898262024, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.551576614379883, "rewards_train/margins_1": 1.2422618865966797, "rewards_train/margins_2": 1.3597823977470398, "step": 651 }, { "epoch": 1.95, "logps_train/policy_1_2": -130.6761016845703, "logps_train/policy_1_l": -224.77517700195312, "logps_train/policy_1_w": -63.474388122558594, "logps_train/policy_2_2": -103.27075958251953, "logps_train/policy_2_w": -79.61632537841797, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": 1.1909836530685425, "rewards_train/1-l": -4.2603302001953125, "rewards_train/1-w": 2.0842018127441406, "rewards_train/2-2": 2.6908926963806152, "rewards_train/2-w": 1.5532114505767822, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.344532012939453, "rewards_train/margins_1": 0.8932181596755981, "rewards_train/margins_2": 1.137681245803833, "step": 651 }, { "epoch": 1.95, "logps_train/policy_1_2": -258.70941162109375, "logps_train/policy_1_l": -177.78302001953125, "logps_train/policy_1_w": -185.34576416015625, "logps_train/policy_2_2": -209.9097442626953, "logps_train/policy_2_w": -231.797607421875, "logps_train/ref_1_2": -270.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -225.0, "logps_train/ref_2_2": -244.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.0884329080581665, "rewards_train/1-l": -1.882209300994873, "rewards_train/1-w": 3.9568288326263428, "rewards_train/2-2": 3.313713312149048, "rewards_train/2-w": 2.1764888763427734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.839038133621216, "rewards_train/margins_1": 2.8683959245681763, "rewards_train/margins_2": 1.1372244358062744, "step": 651 }, { "epoch": 1.95, "logps_train/policy_1_2": -211.5924072265625, "logps_train/policy_1_l": -163.82301330566406, "logps_train/policy_1_w": -124.70159149169922, "logps_train/policy_2_2": -174.49807739257812, "logps_train/policy_2_w": -167.69358825683594, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.8720089197158813, "rewards_train/1-l": -1.7194589376449585, "rewards_train/1-w": 3.8329660892486572, "rewards_train/2-2": 3.7169909477233887, "rewards_train/2-w": 1.577515721321106, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.552425026893616, "rewards_train/margins_1": 1.9609571695327759, "rewards_train/margins_2": 2.1394752264022827, "step": 651 }, { "epoch": 1.95, "learning_rate": 9.87539346195776e-09, "loss": 0.3593, "step": 652 }, { "epoch": 1.95, "logps_train/policy_1_2": -123.4498519897461, "logps_train/policy_1_l": -152.71388244628906, "logps_train/policy_1_w": -115.5246810913086, "logps_train/policy_2_2": -98.46598052978516, "logps_train/policy_2_w": -161.25357055664062, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 1.393296241760254, "rewards_train/1-l": -1.8927747011184692, "rewards_train/1-w": 2.5264384746551514, "rewards_train/2-2": 2.0815272331237793, "rewards_train/2-w": 1.1215181350708008, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.419213175773621, "rewards_train/margins_1": 1.1331422328948975, "rewards_train/margins_2": 0.9600090980529785, "step": 652 }, { "epoch": 1.95, "logps_train/policy_1_2": -101.50264739990234, "logps_train/policy_1_l": -111.93724822998047, "logps_train/policy_1_w": -117.04566955566406, "logps_train/policy_2_2": -73.55390167236328, "logps_train/policy_2_w": -149.74087524414062, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -95.5, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 0.9575479030609131, "rewards_train/1-l": -1.6663322448730469, "rewards_train/1-w": 2.7290751934051514, "rewards_train/2-2": 2.193047046661377, "rewards_train/2-w": 1.3070168495178223, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.395407438278198, "rewards_train/margins_1": 1.7715272903442383, "rewards_train/margins_2": 0.8860301971435547, "step": 652 }, { "epoch": 1.95, "logps_train/policy_1_2": -168.4513702392578, "logps_train/policy_1_l": -138.27772521972656, "logps_train/policy_1_w": -102.1611328125, "logps_train/policy_2_2": -111.25128173828125, "logps_train/policy_2_w": -157.58322143554688, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 0.5181445479393005, "rewards_train/1-l": -2.327185869216919, "rewards_train/1-w": 2.3368167877197266, "rewards_train/2-2": 2.5350279808044434, "rewards_train/2-w": 0.15574046969413757, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.6640026569366455, "rewards_train/margins_1": 1.818672239780426, "rewards_train/margins_2": 2.379287511110306, "step": 652 }, { "epoch": 1.95, "logps_train/policy_1_2": -126.04676818847656, "logps_train/policy_1_l": -110.15065002441406, "logps_train/policy_1_w": -104.74394226074219, "logps_train/policy_2_2": -89.60370635986328, "logps_train/policy_2_w": -144.00390625, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.1132922172546387, "rewards_train/1-l": -1.212721347808838, "rewards_train/1-w": 3.041231155395508, "rewards_train/2-2": 2.3743948936462402, "rewards_train/2-w": 1.7746102809906006, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.253952503204346, "rewards_train/margins_1": 1.9279389381408691, "rewards_train/margins_2": 0.5997846126556396, "step": 652 }, { "epoch": 1.95, "logps_train/policy_1_2": -132.01065063476562, "logps_train/policy_1_l": -297.3391418457031, "logps_train/policy_1_w": -163.5238037109375, "logps_train/policy_2_2": -93.2081527709961, "logps_train/policy_2_w": -227.69033813476562, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -272.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": 1.514169692993164, "rewards_train/1-l": -2.5624287128448486, "rewards_train/1-w": 4.074182033538818, "rewards_train/2-2": 2.723325729370117, "rewards_train/2-w": 1.9590903520584106, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.636610746383667, "rewards_train/margins_1": 2.5600123405456543, "rewards_train/margins_2": 0.7642353773117065, "step": 652 }, { "epoch": 1.95, "logps_train/policy_1_2": -219.83535766601562, "logps_train/policy_1_l": -328.7407531738281, "logps_train/policy_1_w": -139.4492950439453, "logps_train/policy_2_2": -163.44992065429688, "logps_train/policy_2_w": -192.98519897460938, "logps_train/ref_1_2": -237.0, "logps_train/ref_1_l": -284.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 1.7008395195007324, "rewards_train/1-l": -4.541262626647949, "rewards_train/1-w": 3.9956953525543213, "rewards_train/2-2": 4.014383792877197, "rewards_train/2-w": 2.178042411804199, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 8.53695797920227, "rewards_train/margins_1": 2.294855833053589, "rewards_train/margins_2": 1.836341381072998, "step": 652 }, { "epoch": 1.95, "logps_train/policy_1_2": -187.5511932373047, "logps_train/policy_1_l": -256.2229919433594, "logps_train/policy_1_w": -170.70274353027344, "logps_train/policy_2_2": -157.140869140625, "logps_train/policy_2_w": -215.31314086914062, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -232.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -236.0, "rewards_train/1-2": 2.644880771636963, "rewards_train/1-l": -2.4060888290405273, "rewards_train/1-w": 4.093006610870361, "rewards_train/2-2": 3.6585705280303955, "rewards_train/2-w": 2.124937057495117, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.499095439910889, "rewards_train/margins_1": 1.4481258392333984, "rewards_train/margins_2": 1.5336334705352783, "step": 652 }, { "epoch": 1.95, "logps_train/policy_1_2": -84.69688415527344, "logps_train/policy_1_l": -110.0471420288086, "logps_train/policy_1_w": -89.30867767333984, "logps_train/policy_2_2": -60.865447998046875, "logps_train/policy_2_w": -112.78553009033203, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -84.0, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": 1.5506236553192139, "rewards_train/1-l": -1.7699484825134277, "rewards_train/1-w": 2.5089762210845947, "rewards_train/2-2": 2.329861640930176, "rewards_train/2-w": 1.4464473724365234, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.2789247035980225, "rewards_train/margins_1": 0.9583525657653809, "rewards_train/margins_2": 0.8834142684936523, "step": 652 }, { "epoch": 1.96, "logps_train/policy_1_2": -253.5225067138672, "logps_train/policy_1_l": -401.89849853515625, "logps_train/policy_1_w": -198.6746368408203, "logps_train/policy_2_2": -180.07479858398438, "logps_train/policy_2_w": -257.9519348144531, "logps_train/ref_1_2": -274.0, "logps_train/ref_1_l": -364.0, "logps_train/ref_1_w": -243.0, "logps_train/ref_2_2": -220.0, "logps_train/ref_2_w": -278.0, "rewards_train/1-2": 1.9852495193481445, "rewards_train/1-l": -3.689850330352783, "rewards_train/1-w": 4.382535934448242, "rewards_train/2-2": 3.9987711906433105, "rewards_train/2-w": 1.9923052787780762, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 8.072386264801025, "rewards_train/margins_1": 2.3972864151000977, "rewards_train/margins_2": 2.0064659118652344, "step": 653 }, { "epoch": 1.96, "logps_train/policy_1_2": -173.94989013671875, "logps_train/policy_1_l": -347.41485595703125, "logps_train/policy_1_w": -151.32127380371094, "logps_train/policy_2_2": -132.74581909179688, "logps_train/policy_2_w": -212.23573303222656, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -304.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": 1.6585506200790405, "rewards_train/1-l": -4.200442790985107, "rewards_train/1-w": 4.10224723815918, "rewards_train/2-2": 3.246511936187744, "rewards_train/2-w": 1.4076759815216064, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 8.302690029144287, "rewards_train/margins_1": 2.443696618080139, "rewards_train/margins_2": 1.8388359546661377, "step": 653 }, { "epoch": 1.96, "logps_train/policy_1_2": -192.07481384277344, "logps_train/policy_1_l": -228.752197265625, "logps_train/policy_1_w": -103.82122802734375, "logps_train/policy_2_2": -154.22328186035156, "logps_train/policy_2_w": -144.85165405273438, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 2.0581445693969727, "rewards_train/1-l": -2.6322505474090576, "rewards_train/1-w": 3.003815174102783, "rewards_train/2-2": 3.4917349815368652, "rewards_train/2-w": 1.276944875717163, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.636065721511841, "rewards_train/margins_1": 0.9456706047058105, "rewards_train/margins_2": 2.214790105819702, "step": 653 }, { "epoch": 1.96, "logps_train/policy_1_2": -223.75086975097656, "logps_train/policy_1_l": -171.33868408203125, "logps_train/policy_1_w": -136.51705932617188, "logps_train/policy_2_2": -161.91140747070312, "logps_train/policy_2_w": -196.8888702392578, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": 1.4405378103256226, "rewards_train/1-l": -1.1588698625564575, "rewards_train/1-w": 3.6014199256896973, "rewards_train/2-2": 3.811983346939087, "rewards_train/2-w": 1.7173633575439453, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.760289788246155, "rewards_train/margins_1": 2.1608821153640747, "rewards_train/margins_2": 2.0946199893951416, "step": 653 }, { "epoch": 1.96, "logps_train/policy_1_2": -117.9852066040039, "logps_train/policy_1_l": -102.58944702148438, "logps_train/policy_1_w": -66.3484115600586, "logps_train/policy_2_2": -87.0478286743164, "logps_train/policy_2_w": -92.47743225097656, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -86.5, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -106.0, "rewards_train/1-2": 0.8608548045158386, "rewards_train/1-l": -1.5757415294647217, "rewards_train/1-w": 2.068284273147583, "rewards_train/2-2": 2.104982852935791, "rewards_train/2-w": 1.350304126739502, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.6440258026123047, "rewards_train/margins_1": 1.2074294686317444, "rewards_train/margins_2": 0.7546787261962891, "step": 653 }, { "epoch": 1.96, "logps_train/policy_1_2": -172.48587036132812, "logps_train/policy_1_l": -197.099853515625, "logps_train/policy_1_w": -93.20895385742188, "logps_train/policy_2_2": -135.07058715820312, "logps_train/policy_2_w": -124.27485656738281, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 1.3795373439788818, "rewards_train/1-l": -2.638988971710205, "rewards_train/1-w": 3.16894793510437, "rewards_train/2-2": 3.0398168563842773, "rewards_train/2-w": 1.9326709508895874, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.807936906814575, "rewards_train/margins_1": 1.7894105911254883, "rewards_train/margins_2": 1.10714590549469, "step": 653 }, { "epoch": 1.96, "logps_train/policy_1_2": -214.58233642578125, "logps_train/policy_1_l": -191.22076416015625, "logps_train/policy_1_w": -147.067138671875, "logps_train/policy_2_2": -162.11663818359375, "logps_train/policy_2_w": -194.9951934814453, "logps_train/ref_1_2": -229.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 1.4636411666870117, "rewards_train/1-l": -2.12910795211792, "rewards_train/1-w": 3.3456294536590576, "rewards_train/2-2": 3.2090396881103516, "rewards_train/2-w": 1.5504796504974365, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.4747374057769775, "rewards_train/margins_1": 1.881988286972046, "rewards_train/margins_2": 1.658560037612915, "step": 653 }, { "epoch": 1.96, "logps_train/policy_1_2": -190.54432678222656, "logps_train/policy_1_l": -180.17457580566406, "logps_train/policy_1_w": -169.55197143554688, "logps_train/policy_2_2": -151.9389190673828, "logps_train/policy_2_w": -220.79397583007812, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -235.0, "rewards_train/1-2": 1.6393182277679443, "rewards_train/1-l": -1.9018323421478271, "rewards_train/1-w": 3.177615165710449, "rewards_train/2-2": 3.3670454025268555, "rewards_train/2-w": 1.3666958808898926, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.079447507858276, "rewards_train/margins_1": 1.5382969379425049, "rewards_train/margins_2": 2.000349521636963, "step": 653 }, { "epoch": 1.96, "learning_rate": 7.803859074854425e-09, "loss": 0.3442, "step": 654 }, { "epoch": 1.96, "logps_train/policy_1_2": -123.7107162475586, "logps_train/policy_1_l": -117.6437759399414, "logps_train/policy_1_w": -72.08635711669922, "logps_train/policy_2_2": -89.73284912109375, "logps_train/policy_2_w": -111.37439727783203, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -99.5, "logps_train/ref_1_w": -98.5, "logps_train/ref_2_2": -113.5, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": 0.7312722206115723, "rewards_train/1-l": -1.7788305282592773, "rewards_train/1-w": 2.624176502227783, "rewards_train/2-2": 2.34702730178833, "rewards_train/2-w": 1.6234979629516602, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.4030070304870605, "rewards_train/margins_1": 1.892904281616211, "rewards_train/margins_2": 0.7235293388366699, "step": 654 }, { "epoch": 1.96, "logps_train/policy_1_2": -140.10794067382812, "logps_train/policy_1_l": -118.77214050292969, "logps_train/policy_1_w": -137.52435302734375, "logps_train/policy_2_2": -108.40530395507812, "logps_train/policy_2_w": -176.46969604492188, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": 2.1142067909240723, "rewards_train/1-l": -2.0881519317626953, "rewards_train/1-w": 3.492877721786499, "rewards_train/2-2": 3.3602511882781982, "rewards_train/2-w": 2.0061545372009277, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.581029653549194, "rewards_train/margins_1": 1.3786709308624268, "rewards_train/margins_2": 1.3540966510772705, "step": 654 }, { "epoch": 1.96, "logps_train/policy_1_2": -183.30702209472656, "logps_train/policy_1_l": -127.17764282226562, "logps_train/policy_1_w": -139.99807739257812, "logps_train/policy_2_2": -146.03712463378906, "logps_train/policy_2_w": -188.00006103515625, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": 1.5329689979553223, "rewards_train/1-l": -1.1451079845428467, "rewards_train/1-w": 3.2572221755981445, "rewards_train/2-2": 2.8021464347839355, "rewards_train/2-w": 1.5247983932495117, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.402330160140991, "rewards_train/margins_1": 1.7242531776428223, "rewards_train/margins_2": 1.2773480415344238, "step": 654 }, { "epoch": 1.96, "logps_train/policy_1_2": -117.12361145019531, "logps_train/policy_1_l": -140.2650146484375, "logps_train/policy_1_w": -94.0533447265625, "logps_train/policy_2_2": -91.19828033447266, "logps_train/policy_2_w": -125.15812683105469, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.6126387119293213, "rewards_train/1-l": -1.1625597476959229, "rewards_train/1-w": 2.6509156227111816, "rewards_train/2-2": 2.583296775817871, "rewards_train/2-w": 1.5935616493225098, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8134753704071045, "rewards_train/margins_1": 1.0382769107818604, "rewards_train/margins_2": 0.9897351264953613, "step": 654 }, { "epoch": 1.96, "logps_train/policy_1_2": -209.20504760742188, "logps_train/policy_1_l": -167.8021240234375, "logps_train/policy_1_w": -168.3966064453125, "logps_train/policy_2_2": -161.8251953125, "logps_train/policy_2_w": -222.9813232421875, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -249.0, "rewards_train/1-2": 2.5943403244018555, "rewards_train/1-l": -1.5384154319763184, "rewards_train/1-w": 4.888465404510498, "rewards_train/2-2": 4.39560604095459, "rewards_train/2-w": 2.576866626739502, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.426880836486816, "rewards_train/margins_1": 2.2941250801086426, "rewards_train/margins_2": 1.818739414215088, "step": 654 }, { "epoch": 1.96, "logps_train/policy_1_2": -118.88886260986328, "logps_train/policy_1_l": -145.7594757080078, "logps_train/policy_1_w": -89.1125259399414, "logps_train/policy_2_2": -96.56343078613281, "logps_train/policy_2_w": -112.078125, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -126.5, "rewards_train/1-2": 1.502129077911377, "rewards_train/1-l": -1.8218457698822021, "rewards_train/1-w": 2.319215774536133, "rewards_train/2-2": 2.3502979278564453, "rewards_train/2-w": 1.4281256198883057, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.141061544418335, "rewards_train/margins_1": 0.8170866966247559, "rewards_train/margins_2": 0.9221723079681396, "step": 654 }, { "epoch": 1.96, "logps_train/policy_1_2": -155.75106811523438, "logps_train/policy_1_l": -154.358642578125, "logps_train/policy_1_w": -125.24710083007812, "logps_train/policy_2_2": -97.83263397216797, "logps_train/policy_2_w": -184.7645263671875, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": 0.4780195951461792, "rewards_train/1-l": -1.868286371231079, "rewards_train/1-w": 3.200289487838745, "rewards_train/2-2": 2.7870492935180664, "rewards_train/2-w": 0.8360480070114136, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.068575859069824, "rewards_train/margins_1": 2.722269892692566, "rewards_train/margins_2": 1.9510012865066528, "step": 654 }, { "epoch": 1.96, "logps_train/policy_1_2": -116.60739135742188, "logps_train/policy_1_l": -143.7169189453125, "logps_train/policy_1_w": -80.80668640136719, "logps_train/policy_2_2": -93.87767028808594, "logps_train/policy_2_w": -103.78971862792969, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -102.5, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": 1.685354471206665, "rewards_train/1-l": -2.460754871368408, "rewards_train/1-w": 2.2005813121795654, "rewards_train/2-2": 2.5114521980285645, "rewards_train/2-w": 1.2862627506256104, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.661336183547974, "rewards_train/margins_1": 0.5152268409729004, "rewards_train/margins_2": 1.225189447402954, "step": 654 }, { "epoch": 1.96, "logps_train/policy_1_2": -110.58635711669922, "logps_train/policy_1_l": -138.44107055664062, "logps_train/policy_1_w": -65.54022979736328, "logps_train/policy_2_2": -88.15316772460938, "logps_train/policy_2_w": -87.02567291259766, "logps_train/ref_1_2": -125.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -100.5, "rewards_train/1-2": 1.4101150035858154, "rewards_train/1-l": -1.7784343957901, "rewards_train/1-w": 2.298320770263672, "rewards_train/2-2": 2.4846832752227783, "rewards_train/2-w": 1.347433090209961, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.076755166053772, "rewards_train/margins_1": 0.8882057666778564, "rewards_train/margins_2": 1.1372501850128174, "step": 655 }, { "epoch": 1.96, "logps_train/policy_1_2": -218.40536499023438, "logps_train/policy_1_l": -228.28395080566406, "logps_train/policy_1_w": -138.77359008789062, "logps_train/policy_2_2": -174.5290985107422, "logps_train/policy_2_w": -180.83547973632812, "logps_train/ref_1_2": -235.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.6657142639160156, "rewards_train/1-l": -2.6332788467407227, "rewards_train/1-w": 3.47420334815979, "rewards_train/2-2": 3.7189652919769287, "rewards_train/2-w": 1.7648885250091553, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.107482194900513, "rewards_train/margins_1": 1.8084890842437744, "rewards_train/margins_2": 1.9540767669677734, "step": 655 }, { "epoch": 1.96, "logps_train/policy_1_2": -141.8921356201172, "logps_train/policy_1_l": -128.28128051757812, "logps_train/policy_1_w": -120.705322265625, "logps_train/policy_2_2": -115.19132995605469, "logps_train/policy_2_w": -153.46426391601562, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.7967238426208496, "rewards_train/1-l": -1.9601589441299438, "rewards_train/1-w": 3.27536678314209, "rewards_train/2-2": 3.0753982067108154, "rewards_train/2-w": 1.5900976657867432, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.235525727272034, "rewards_train/margins_1": 1.4786429405212402, "rewards_train/margins_2": 1.4853005409240723, "step": 655 }, { "epoch": 1.96, "logps_train/policy_1_2": -193.26376342773438, "logps_train/policy_1_l": -174.30718994140625, "logps_train/policy_1_w": -112.71770477294922, "logps_train/policy_2_2": -143.51028442382812, "logps_train/policy_2_w": -161.92286682128906, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.0849521160125732, "rewards_train/1-l": -2.4604060649871826, "rewards_train/1-w": 3.0969791412353516, "rewards_train/2-2": 3.400925874710083, "rewards_train/2-w": 1.4249008893966675, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.557385206222534, "rewards_train/margins_1": 2.0120270252227783, "rewards_train/margins_2": 1.9760249853134155, "step": 655 }, { "epoch": 1.96, "logps_train/policy_1_2": -126.33822631835938, "logps_train/policy_1_l": -148.20921325683594, "logps_train/policy_1_w": -117.74285888671875, "logps_train/policy_2_2": -94.193359375, "logps_train/policy_2_w": -163.30984497070312, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.9830231666564941, "rewards_train/1-l": -2.105638027191162, "rewards_train/1-w": 3.1788392066955566, "rewards_train/2-2": 3.3954339027404785, "rewards_train/2-w": 1.6158905029296875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.284477233886719, "rewards_train/margins_1": 1.1958160400390625, "rewards_train/margins_2": 1.779543399810791, "step": 655 }, { "epoch": 1.96, "logps_train/policy_1_2": -81.46260833740234, "logps_train/policy_1_l": -147.73992919921875, "logps_train/policy_1_w": -76.7311019897461, "logps_train/policy_2_2": -54.39576721191406, "logps_train/policy_2_w": -109.31326293945312, "logps_train/ref_1_2": -92.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -76.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 1.0931923389434814, "rewards_train/1-l": -2.3207221031188965, "rewards_train/1-w": 1.9557963609695435, "rewards_train/2-2": 2.137376546859741, "rewards_train/2-w": 1.0706264972686768, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.27651846408844, "rewards_train/margins_1": 0.862604022026062, "rewards_train/margins_2": 1.0667500495910645, "step": 655 }, { "epoch": 1.96, "logps_train/policy_1_2": -70.67354583740234, "logps_train/policy_1_l": -77.50184631347656, "logps_train/policy_1_w": -72.50215148925781, "logps_train/policy_2_2": -51.281768798828125, "logps_train/policy_2_w": -98.23057556152344, "logps_train/ref_1_2": -78.5, "logps_train/ref_1_l": -67.0, "logps_train/ref_1_w": -96.5, "logps_train/ref_2_2": -68.0, "logps_train/ref_2_w": -108.0, "rewards_train/1-2": 0.7834266424179077, "rewards_train/1-l": -1.011366844177246, "rewards_train/1-w": 2.3673629760742188, "rewards_train/2-2": 1.6802215576171875, "rewards_train/2-w": 0.995692789554596, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.378729820251465, "rewards_train/margins_1": 1.583936333656311, "rewards_train/margins_2": 0.6845287680625916, "step": 655 }, { "epoch": 1.96, "logps_train/policy_1_2": -175.0543212890625, "logps_train/policy_1_l": -161.42628479003906, "logps_train/policy_1_w": -100.64856719970703, "logps_train/policy_2_2": -125.55351257324219, "logps_train/policy_2_w": -145.91363525390625, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.0695680379867554, "rewards_train/1-l": -2.0520036220550537, "rewards_train/1-w": 3.6257681846618652, "rewards_train/2-2": 3.175898313522339, "rewards_train/2-w": 2.233637571334839, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.677771806716919, "rewards_train/margins_1": 2.55620014667511, "rewards_train/margins_2": 0.9422607421875, "step": 655 }, { "epoch": 1.96, "learning_rate": 5.975558637634216e-09, "loss": 0.3831, "step": 656 }, { "epoch": 1.96, "logps_train/policy_1_2": -124.56289672851562, "logps_train/policy_1_l": -109.8896484375, "logps_train/policy_1_w": -97.69119262695312, "logps_train/policy_2_2": -96.04930114746094, "logps_train/policy_2_w": -130.45603942871094, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": 2.113241672515869, "rewards_train/1-l": -1.2014648914337158, "rewards_train/1-w": 2.8217499256134033, "rewards_train/2-2": 2.7954607009887695, "rewards_train/2-w": 1.3156752586364746, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.023214817047119, "rewards_train/margins_1": 0.7085082530975342, "rewards_train/margins_2": 1.479785442352295, "step": 656 }, { "epoch": 1.96, "logps_train/policy_1_2": -198.60533142089844, "logps_train/policy_1_l": -202.48094177246094, "logps_train/policy_1_w": -191.6261444091797, "logps_train/policy_2_2": -164.34254455566406, "logps_train/policy_2_w": -227.0760955810547, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -233.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -253.0, "rewards_train/1-2": 2.1425914764404297, "rewards_train/1-l": -2.502002239227295, "rewards_train/1-w": 4.112384796142578, "rewards_train/2-2": 3.8032455444335938, "rewards_train/2-w": 2.6048901081085205, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.614387035369873, "rewards_train/margins_1": 1.9697933197021484, "rewards_train/margins_2": 1.1983554363250732, "step": 656 }, { "epoch": 1.96, "logps_train/policy_1_2": -174.26629638671875, "logps_train/policy_1_l": -140.64280700683594, "logps_train/policy_1_w": -116.93040466308594, "logps_train/policy_2_2": -127.37317657470703, "logps_train/policy_2_w": -160.66490173339844, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.3339176177978516, "rewards_train/1-l": -1.293967843055725, "rewards_train/1-w": 3.4350850582122803, "rewards_train/2-2": 2.989635705947876, "rewards_train/2-w": 1.7522602081298828, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.729052901268005, "rewards_train/margins_1": 2.1011674404144287, "rewards_train/margins_2": 1.2373754978179932, "step": 656 }, { "epoch": 1.96, "logps_train/policy_1_2": -250.23635864257812, "logps_train/policy_1_l": -221.52293395996094, "logps_train/policy_1_w": -185.3297576904297, "logps_train/policy_2_2": -215.72213745117188, "logps_train/policy_2_w": -232.3068084716797, "logps_train/ref_1_2": -280.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -260.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 3.0951130390167236, "rewards_train/1-l": -1.8397934436798096, "rewards_train/1-w": 3.529524803161621, "rewards_train/2-2": 4.571537017822266, "rewards_train/2-w": 2.0943188667297363, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.369318246841431, "rewards_train/margins_1": 0.43441176414489746, "rewards_train/margins_2": 2.4772181510925293, "step": 656 }, { "epoch": 1.96, "logps_train/policy_1_2": -116.47341918945312, "logps_train/policy_1_l": -139.1506805419922, "logps_train/policy_1_w": -79.46765899658203, "logps_train/policy_2_2": -83.91169738769531, "logps_train/policy_2_w": -114.13777160644531, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.9964077472686768, "rewards_train/1-l": -2.6040332317352295, "rewards_train/1-w": 3.271984100341797, "rewards_train/2-2": 2.857854127883911, "rewards_train/2-w": 1.979973316192627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.876017332077026, "rewards_train/margins_1": 1.2755763530731201, "rewards_train/margins_2": 0.8778808116912842, "step": 656 }, { "epoch": 1.96, "logps_train/policy_1_2": -131.88052368164062, "logps_train/policy_1_l": -143.30348205566406, "logps_train/policy_1_w": -56.6961784362793, "logps_train/policy_2_2": -99.73692321777344, "logps_train/policy_2_w": -82.53302764892578, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -74.5, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -93.5, "rewards_train/1-2": 1.10921311378479, "rewards_train/1-l": -2.0319104194641113, "rewards_train/1-w": 1.7650506496429443, "rewards_train/2-2": 2.6099014282226562, "rewards_train/2-w": 1.1142754554748535, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7969610691070557, "rewards_train/margins_1": 0.6558375358581543, "rewards_train/margins_2": 1.4956259727478027, "step": 656 }, { "epoch": 1.96, "logps_train/policy_1_2": -102.14258575439453, "logps_train/policy_1_l": -130.63473510742188, "logps_train/policy_1_w": -77.46455383300781, "logps_train/policy_2_2": -75.40483093261719, "logps_train/policy_2_w": -100.60209655761719, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -103.5, "logps_train/ref_2_w": -115.5, "rewards_train/1-2": 1.6873035430908203, "rewards_train/1-l": -2.555173397064209, "rewards_train/1-w": 2.6691696643829346, "rewards_train/2-2": 2.7821736335754395, "rewards_train/2-w": 1.4929147958755493, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.2243430614471436, "rewards_train/margins_1": 0.9818661212921143, "rewards_train/margins_2": 1.2892588376998901, "step": 656 }, { "epoch": 1.96, "logps_train/policy_1_2": -142.88853454589844, "logps_train/policy_1_l": -160.4945068359375, "logps_train/policy_1_w": -121.82646179199219, "logps_train/policy_2_2": -126.61219787597656, "logps_train/policy_2_w": -149.48565673828125, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": 2.2283339500427246, "rewards_train/1-l": -2.2307000160217285, "rewards_train/1-w": 3.558760643005371, "rewards_train/2-2": 2.694248676300049, "rewards_train/2-w": 2.3592474460601807, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.7894606590271, "rewards_train/margins_1": 1.3304266929626465, "rewards_train/margins_2": 0.33500123023986816, "step": 656 }, { "epoch": 1.97, "logps_train/policy_1_2": -74.37080383300781, "logps_train/policy_1_l": -114.00787353515625, "logps_train/policy_1_w": -129.44180297851562, "logps_train/policy_2_2": -57.05866622924805, "logps_train/policy_2_w": -176.36538696289062, "logps_train/ref_1_2": -90.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -77.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.5890913009643555, "rewards_train/1-l": -1.6368228197097778, "rewards_train/1-w": 3.3987882137298584, "rewards_train/2-2": 2.014836311340332, "rewards_train/2-w": 1.332992672920227, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.035611033439636, "rewards_train/margins_1": 1.809696912765503, "rewards_train/margins_2": 0.681843638420105, "step": 657 }, { "epoch": 1.97, "logps_train/policy_1_2": -117.39784240722656, "logps_train/policy_1_l": -80.87391662597656, "logps_train/policy_1_w": -111.24824523925781, "logps_train/policy_2_2": -95.69810485839844, "logps_train/policy_2_w": -137.62176513671875, "logps_train/ref_1_2": -125.0, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -112.5, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.756309986114502, "rewards_train/1-l": -1.0673959255218506, "rewards_train/1-w": 2.108769416809082, "rewards_train/2-2": 1.6672992706298828, "rewards_train/2-w": 0.7741524577140808, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1761653423309326, "rewards_train/margins_1": 1.35245943069458, "rewards_train/margins_2": 0.893146812915802, "step": 657 }, { "epoch": 1.97, "logps_train/policy_1_2": -173.4757843017578, "logps_train/policy_1_l": -190.11546325683594, "logps_train/policy_1_w": -91.00982666015625, "logps_train/policy_2_2": -142.29981994628906, "logps_train/policy_2_w": -124.39360046386719, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": 1.9930462837219238, "rewards_train/1-l": -1.470139741897583, "rewards_train/1-w": 3.059173107147217, "rewards_train/2-2": 3.1825180053710938, "rewards_train/2-w": 2.2793898582458496, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.5293128490448, "rewards_train/margins_1": 1.066126823425293, "rewards_train/margins_2": 0.9031281471252441, "step": 657 }, { "epoch": 1.97, "logps_train/policy_1_2": -215.733154296875, "logps_train/policy_1_l": -250.39163208007812, "logps_train/policy_1_w": -237.69842529296875, "logps_train/policy_2_2": -174.59608459472656, "logps_train/policy_2_w": -292.1088562011719, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -278.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -312.0, "rewards_train/1-2": 2.222388744354248, "rewards_train/1-l": -2.1116247177124023, "rewards_train/1-w": 4.032696723937988, "rewards_train/2-2": 3.5849227905273438, "rewards_train/2-w": 1.9826688766479492, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.144321441650391, "rewards_train/margins_1": 1.8103079795837402, "rewards_train/margins_2": 1.6022539138793945, "step": 657 }, { "epoch": 1.97, "logps_train/policy_1_2": -167.71563720703125, "logps_train/policy_1_l": -172.95004272460938, "logps_train/policy_1_w": -57.23961639404297, "logps_train/policy_2_2": -124.72307586669922, "logps_train/policy_2_w": -72.2676010131836, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -81.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -90.0, "rewards_train/1-2": 1.2987494468688965, "rewards_train/1-l": -2.3227391242980957, "rewards_train/1-w": 2.3498668670654297, "rewards_train/2-2": 2.926910638809204, "rewards_train/2-w": 1.727146029472351, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.672605991363525, "rewards_train/margins_1": 1.0511174201965332, "rewards_train/margins_2": 1.199764609336853, "step": 657 }, { "epoch": 1.97, "logps_train/policy_1_2": -148.7535858154297, "logps_train/policy_1_l": -179.839111328125, "logps_train/policy_1_w": -127.0979995727539, "logps_train/policy_2_2": -111.64750671386719, "logps_train/policy_2_w": -169.9429473876953, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 1.4613595008850098, "rewards_train/1-l": -1.883130431175232, "rewards_train/1-w": 3.551137685775757, "rewards_train/2-2": 2.6748976707458496, "rewards_train/2-w": 1.6541426181793213, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.434268116950989, "rewards_train/margins_1": 2.089778184890747, "rewards_train/margins_2": 1.0207550525665283, "step": 657 }, { "epoch": 1.97, "logps_train/policy_1_2": -215.16915893554688, "logps_train/policy_1_l": -220.32345581054688, "logps_train/policy_1_w": -124.67161560058594, "logps_train/policy_2_2": -177.8213348388672, "logps_train/policy_2_w": -161.68936157226562, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 2.371365547180176, "rewards_train/1-l": -2.16359543800354, "rewards_train/1-w": 3.851588010787964, "rewards_train/2-2": 3.498335361480713, "rewards_train/2-w": 2.8560643196105957, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.015183448791504, "rewards_train/margins_1": 1.480222463607788, "rewards_train/margins_2": 0.6422710418701172, "step": 657 }, { "epoch": 1.97, "logps_train/policy_1_2": -81.33328247070312, "logps_train/policy_1_l": -138.96749877929688, "logps_train/policy_1_w": -62.64504623413086, "logps_train/policy_2_2": -66.11968231201172, "logps_train/policy_2_w": -90.67780303955078, "logps_train/ref_1_2": -92.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -83.5, "logps_train/ref_2_2": -82.5, "logps_train/ref_2_w": -100.5, "rewards_train/1-2": 1.0608617067337036, "rewards_train/1-l": -1.868747591972351, "rewards_train/1-w": 2.113718271255493, "rewards_train/2-2": 1.6198680400848389, "rewards_train/2-w": 0.9781176447868347, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9824658632278442, "rewards_train/margins_1": 1.0528565645217896, "rewards_train/margins_2": 0.6417503952980042, "step": 657 }, { "epoch": 1.97, "learning_rate": 4.390670589196622e-09, "loss": 0.3764, "step": 658 }, { "epoch": 1.97, "logps_train/policy_1_2": -121.4733657836914, "logps_train/policy_1_l": -131.94244384765625, "logps_train/policy_1_w": -104.70176696777344, "logps_train/policy_2_2": -87.23686218261719, "logps_train/policy_2_w": -139.8557891845703, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 1.329225778579712, "rewards_train/1-l": -1.8670973777770996, "rewards_train/1-w": 2.5587289333343506, "rewards_train/2-2": 2.604438304901123, "rewards_train/2-w": 1.5331714153289795, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.42582631111145, "rewards_train/margins_1": 1.2295031547546387, "rewards_train/margins_2": 1.0712668895721436, "step": 658 }, { "epoch": 1.97, "logps_train/policy_1_2": -184.00241088867188, "logps_train/policy_1_l": -147.33047485351562, "logps_train/policy_1_w": -139.408935546875, "logps_train/policy_2_2": -147.28121948242188, "logps_train/policy_2_w": -166.0511474609375, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 2.015385150909424, "rewards_train/1-l": -1.4303139448165894, "rewards_train/1-w": 3.4692635536193848, "rewards_train/2-2": 3.3023478984832764, "rewards_train/2-w": 2.1214475631713867, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.899577498435974, "rewards_train/margins_1": 1.453878402709961, "rewards_train/margins_2": 1.1809003353118896, "step": 658 }, { "epoch": 1.97, "logps_train/policy_1_2": -129.2005615234375, "logps_train/policy_1_l": -140.6847381591797, "logps_train/policy_1_w": -96.40614318847656, "logps_train/policy_2_2": -104.60696411132812, "logps_train/policy_2_w": -122.11553955078125, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 1.4002563953399658, "rewards_train/1-l": -1.2996755838394165, "rewards_train/1-w": 3.1207141876220703, "rewards_train/2-2": 2.0643036365509033, "rewards_train/2-w": 1.8568060398101807, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 4.420389771461487, "rewards_train/margins_1": 1.7204577922821045, "rewards_train/margins_2": 0.20749759674072266, "step": 658 }, { "epoch": 1.97, "logps_train/policy_1_2": -135.85182189941406, "logps_train/policy_1_l": -192.4790496826172, "logps_train/policy_1_w": -90.65601348876953, "logps_train/policy_2_2": -100.33580017089844, "logps_train/policy_2_w": -123.34215545654297, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.2132550477981567, "rewards_train/1-l": -2.2708547115325928, "rewards_train/1-w": 2.451977252960205, "rewards_train/2-2": 2.4847798347473145, "rewards_train/2-w": 1.1243782043457031, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.722831964492798, "rewards_train/margins_1": 1.2387222051620483, "rewards_train/margins_2": 1.3604016304016113, "step": 658 }, { "epoch": 1.97, "logps_train/policy_1_2": -95.31468200683594, "logps_train/policy_1_l": -91.00553131103516, "logps_train/policy_1_w": -73.85034942626953, "logps_train/policy_2_2": -70.540771484375, "logps_train/policy_2_w": -107.32765197753906, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -80.5, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -91.5, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 1.3522229194641113, "rewards_train/1-l": -1.0697426795959473, "rewards_train/1-w": 2.655590057373047, "rewards_train/2-2": 2.0961179733276367, "rewards_train/2-w": 1.4891096353530884, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.725332736968994, "rewards_train/margins_1": 1.3033671379089355, "rewards_train/margins_2": 0.6070083379745483, "step": 658 }, { "epoch": 1.97, "logps_train/policy_1_2": -187.09376525878906, "logps_train/policy_1_l": -193.64675903320312, "logps_train/policy_1_w": -122.90733337402344, "logps_train/policy_2_2": -145.33563232421875, "logps_train/policy_2_w": -158.6551971435547, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 1.5624985694885254, "rewards_train/1-l": -2.302565097808838, "rewards_train/1-w": 3.763368606567383, "rewards_train/2-2": 3.386749505996704, "rewards_train/2-w": 2.5965895652770996, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.065933704376221, "rewards_train/margins_1": 2.2008700370788574, "rewards_train/margins_2": 0.7901599407196045, "step": 658 }, { "epoch": 1.97, "logps_train/policy_1_2": -123.59452056884766, "logps_train/policy_1_l": -156.284423828125, "logps_train/policy_1_w": -109.07275390625, "logps_train/policy_2_2": -91.40926361083984, "logps_train/policy_2_w": -145.08473205566406, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -121.5, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.5655475854873657, "rewards_train/1-l": -2.8880133628845215, "rewards_train/1-w": 2.732177734375, "rewards_train/2-2": 3.0239174365997314, "rewards_train/2-w": 1.7602766752243042, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.6201910972595215, "rewards_train/margins_1": 1.1666301488876343, "rewards_train/margins_2": 1.2636407613754272, "step": 658 }, { "epoch": 1.97, "logps_train/policy_1_2": -59.834388732910156, "logps_train/policy_1_l": -91.4522705078125, "logps_train/policy_1_w": -58.04425811767578, "logps_train/policy_2_2": -45.188194274902344, "logps_train/policy_2_w": -68.49447631835938, "logps_train/ref_1_2": -72.0, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -81.5, "logps_train/ref_2_2": -61.75, "logps_train/ref_2_w": -85.0, "rewards_train/1-2": 1.1954677104949951, "rewards_train/1-l": -2.1465935707092285, "rewards_train/1-w": 2.354168176651001, "rewards_train/2-2": 1.648758888244629, "rewards_train/2-w": 1.6630527973175049, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 4.5007617473602295, "rewards_train/margins_1": 1.1587004661560059, "rewards_train/margins_2": -0.014293909072875977, "step": 658 }, { "epoch": 1.97, "logps_train/policy_1_2": -98.7253189086914, "logps_train/policy_1_l": -154.83802795410156, "logps_train/policy_1_w": -95.11221313476562, "logps_train/policy_2_2": -70.2440414428711, "logps_train/policy_2_w": -130.74349975585938, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.5290303230285645, "rewards_train/1-l": -2.2838034629821777, "rewards_train/1-w": 3.3204188346862793, "rewards_train/2-2": 2.6974706649780273, "rewards_train/2-w": 1.5248682498931885, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.604222297668457, "rewards_train/margins_1": 1.7913885116577148, "rewards_train/margins_2": 1.1726024150848389, "step": 659 }, { "epoch": 1.97, "logps_train/policy_1_2": -212.5450439453125, "logps_train/policy_1_l": -192.74798583984375, "logps_train/policy_1_w": -135.4822540283203, "logps_train/policy_2_2": -167.6692657470703, "logps_train/policy_2_w": -175.10569763183594, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 1.9298697710037231, "rewards_train/1-l": -1.9974546432495117, "rewards_train/1-w": 3.771306276321411, "rewards_train/2-2": 4.033073902130127, "rewards_train/2-w": 2.4081804752349854, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.768760919570923, "rewards_train/margins_1": 1.841436505317688, "rewards_train/margins_2": 1.6248934268951416, "step": 659 }, { "epoch": 1.97, "logps_train/policy_1_2": -176.75900268554688, "logps_train/policy_1_l": -182.3489990234375, "logps_train/policy_1_w": -108.07451629638672, "logps_train/policy_2_2": -131.84982299804688, "logps_train/policy_2_w": -154.1400604248047, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.1315219402313232, "rewards_train/1-l": -1.5858763456344604, "rewards_train/1-w": 2.9120798110961914, "rewards_train/2-2": 2.9611120223999023, "rewards_train/2-w": 1.3703694343566895, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.497956156730652, "rewards_train/margins_1": 1.7805578708648682, "rewards_train/margins_2": 1.590742588043213, "step": 659 }, { "epoch": 1.97, "logps_train/policy_1_2": -142.38217163085938, "logps_train/policy_1_l": -72.11245727539062, "logps_train/policy_1_w": -69.20064544677734, "logps_train/policy_2_2": -106.8803939819336, "logps_train/policy_2_w": -106.69853210449219, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -58.25, "logps_train/ref_1_w": -94.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": 1.02115797996521, "rewards_train/1-l": -1.3758947849273682, "rewards_train/1-w": 2.455716609954834, "rewards_train/2-2": 2.6744608879089355, "rewards_train/2-w": 0.772138774394989, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.831611394882202, "rewards_train/margins_1": 1.434558629989624, "rewards_train/margins_2": 1.9023221135139465, "step": 659 }, { "epoch": 1.97, "logps_train/policy_1_2": -155.95655822753906, "logps_train/policy_1_l": -199.41879272460938, "logps_train/policy_1_w": -129.48138427734375, "logps_train/policy_2_2": -117.51953125, "logps_train/policy_2_w": -176.34677124023438, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 1.7587382793426514, "rewards_train/1-l": -2.494126081466675, "rewards_train/1-w": 3.343268394470215, "rewards_train/2-2": 3.186328887939453, "rewards_train/2-w": 1.3637596368789673, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.83739447593689, "rewards_train/margins_1": 1.5845301151275635, "rewards_train/margins_2": 1.8225692510604858, "step": 659 }, { "epoch": 1.97, "logps_train/policy_1_2": -125.99629974365234, "logps_train/policy_1_l": -162.11288452148438, "logps_train/policy_1_w": -159.1826171875, "logps_train/policy_2_2": -96.142578125, "logps_train/policy_2_w": -203.46475219726562, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.7765421867370605, "rewards_train/1-l": -1.556601643562317, "rewards_train/1-w": 3.920508861541748, "rewards_train/2-2": 2.8009767532348633, "rewards_train/2-w": 1.857431411743164, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.477110505104065, "rewards_train/margins_1": 2.1439666748046875, "rewards_train/margins_2": 0.9435453414916992, "step": 659 }, { "epoch": 1.97, "logps_train/policy_1_2": -127.59454345703125, "logps_train/policy_1_l": -178.96714782714844, "logps_train/policy_1_w": -55.98719787597656, "logps_train/policy_2_2": -91.82933044433594, "logps_train/policy_2_w": -92.71076965332031, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -79.0, "logps_train/ref_2_2": -114.5, "logps_train/ref_2_w": -104.0, "rewards_train/1-2": 0.7717950940132141, "rewards_train/1-l": -2.745298385620117, "rewards_train/1-w": 2.2680768966674805, "rewards_train/2-2": 2.253786563873291, "rewards_train/2-w": 1.1406420469284058, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.013375282287598, "rewards_train/margins_1": 1.4962818026542664, "rewards_train/margins_2": 1.1131445169448853, "step": 659 }, { "epoch": 1.97, "logps_train/policy_1_2": -127.73664855957031, "logps_train/policy_1_l": -142.45989990234375, "logps_train/policy_1_w": -77.85212707519531, "logps_train/policy_2_2": -88.74623107910156, "logps_train/policy_2_w": -114.02778625488281, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 0.9048508405685425, "rewards_train/1-l": -2.7338809967041016, "rewards_train/1-w": 2.611271381378174, "rewards_train/2-2": 2.4373879432678223, "rewards_train/2-w": 1.1347216367721558, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.345152378082275, "rewards_train/margins_1": 1.7064205408096313, "rewards_train/margins_2": 1.3026663064956665, "step": 659 }, { "epoch": 1.98, "learning_rate": 3.049349611820851e-09, "loss": 0.3955, "step": 660 }, { "epoch": 1.98, "logps_train/policy_1_2": -137.5743408203125, "logps_train/policy_1_l": -119.7723388671875, "logps_train/policy_1_w": -59.92631149291992, "logps_train/policy_2_2": -100.51887512207031, "logps_train/policy_2_w": -96.06350708007812, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -80.5, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -103.0, "rewards_train/1-2": 0.7956902980804443, "rewards_train/1-l": -2.001843214035034, "rewards_train/1-w": 2.046236276626587, "rewards_train/2-2": 2.2949867248535156, "rewards_train/2-w": 0.6959928870201111, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.048079490661621, "rewards_train/margins_1": 1.2505459785461426, "rewards_train/margins_2": 1.5989938378334045, "step": 660 }, { "epoch": 1.98, "logps_train/policy_1_2": -107.6343994140625, "logps_train/policy_1_l": -184.47019958496094, "logps_train/policy_1_w": -93.69436645507812, "logps_train/policy_2_2": -79.6421127319336, "logps_train/policy_2_w": -134.0317840576172, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.5474977493286133, "rewards_train/1-l": -2.918309211730957, "rewards_train/1-w": 3.117281436920166, "rewards_train/2-2": 2.506882667541504, "rewards_train/2-w": 1.657759189605713, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 6.035590648651123, "rewards_train/margins_1": 1.5697836875915527, "rewards_train/margins_2": 0.849123477935791, "step": 660 }, { "epoch": 1.98, "logps_train/policy_1_2": -110.5118179321289, "logps_train/policy_1_l": -171.43821716308594, "logps_train/policy_1_w": -130.84312438964844, "logps_train/policy_2_2": -84.95573425292969, "logps_train/policy_2_w": -164.01336669921875, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.067837119102478, "rewards_train/1-l": -1.8094466924667358, "rewards_train/1-w": 3.4664688110351562, "rewards_train/2-2": 2.167879104614258, "rewards_train/2-w": 1.818196177482605, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.275915503501892, "rewards_train/margins_1": 2.3986316919326782, "rewards_train/margins_2": 0.34968292713165283, "step": 660 }, { "epoch": 1.98, "logps_train/policy_1_2": -79.1654052734375, "logps_train/policy_1_l": -65.41741943359375, "logps_train/policy_1_w": -86.50701904296875, "logps_train/policy_2_2": -64.40612030029297, "logps_train/policy_2_w": -100.96827697753906, "logps_train/ref_1_2": -95.5, "logps_train/ref_1_l": -53.5, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -88.0, "logps_train/ref_2_w": -117.0, "rewards_train/1-2": 1.6350220441818237, "rewards_train/1-l": -1.1934999227523804, "rewards_train/1-w": 2.1015443801879883, "rewards_train/2-2": 2.322669506072998, "rewards_train/2-w": 1.6027820110321045, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.2950443029403687, "rewards_train/margins_1": 0.46652233600616455, "rewards_train/margins_2": 0.7198874950408936, "step": 660 }, { "epoch": 1.98, "logps_train/policy_1_2": -101.18534851074219, "logps_train/policy_1_l": -63.780765533447266, "logps_train/policy_1_w": -69.08279418945312, "logps_train/policy_2_2": -82.30618286132812, "logps_train/policy_2_w": -98.05343627929688, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -51.75, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -104.5, "logps_train/ref_2_w": -110.0, "rewards_train/1-2": 1.2990432977676392, "rewards_train/1-l": -1.2108889818191528, "rewards_train/1-w": 2.403439521789551, "rewards_train/2-2": 2.2109830379486084, "rewards_train/2-w": 1.2321562767028809, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6143285036087036, "rewards_train/margins_1": 1.1043962240219116, "rewards_train/margins_2": 0.9788267612457275, "step": 660 }, { "epoch": 1.98, "logps_train/policy_1_2": -129.1187744140625, "logps_train/policy_1_l": -180.704345703125, "logps_train/policy_1_w": -129.65542602539062, "logps_train/policy_2_2": -98.197265625, "logps_train/policy_2_w": -173.60775756835938, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 1.6607784032821655, "rewards_train/1-l": -2.949340343475342, "rewards_train/1-w": 3.0633625984191895, "rewards_train/2-2": 2.6499996185302734, "rewards_train/2-w": 1.3759437799453735, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.012702941894531, "rewards_train/margins_1": 1.402584195137024, "rewards_train/margins_2": 1.2740558385849, "step": 660 }, { "epoch": 1.98, "logps_train/policy_1_2": -152.81138610839844, "logps_train/policy_1_l": -187.51608276367188, "logps_train/policy_1_w": -138.2649688720703, "logps_train/policy_2_2": -100.37232208251953, "logps_train/policy_2_w": -210.27793884277344, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": 1.1422985792160034, "rewards_train/1-l": -2.415670871734619, "rewards_train/1-w": 3.4227218627929688, "rewards_train/2-2": 3.2690176963806152, "rewards_train/2-w": 0.4913460612297058, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.838392734527588, "rewards_train/margins_1": 2.2804232835769653, "rewards_train/margins_2": 2.7776716351509094, "step": 660 }, { "epoch": 1.98, "logps_train/policy_1_2": -60.14554214477539, "logps_train/policy_1_l": -33.75136184692383, "logps_train/policy_1_w": -56.64992904663086, "logps_train/policy_2_2": -43.778594970703125, "logps_train/policy_2_w": -74.8584213256836, "logps_train/ref_1_2": -65.5, "logps_train/ref_1_l": -21.0, "logps_train/ref_1_w": -72.5, "logps_train/ref_2_2": -57.0, "logps_train/ref_2_w": -80.5, "rewards_train/1-2": 0.5170865058898926, "rewards_train/1-l": -1.273182988166809, "rewards_train/1-w": 1.6107885837554932, "rewards_train/2-2": 1.3155486583709717, "rewards_train/2-w": 0.5805641412734985, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8839715719223022, "rewards_train/margins_1": 1.0937020778656006, "rewards_train/margins_2": 0.7349845170974731, "step": 660 }, { "epoch": 1.98, "logps_train/policy_1_2": -159.66925048828125, "logps_train/policy_1_l": -248.2695770263672, "logps_train/policy_1_w": -107.39104461669922, "logps_train/policy_2_2": -122.9478988647461, "logps_train/policy_2_w": -157.3575439453125, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 1.6971375942230225, "rewards_train/1-l": -3.9232468605041504, "rewards_train/1-w": 3.3214426040649414, "rewards_train/2-2": 2.678647994995117, "rewards_train/2-w": 0.592370331287384, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.244689464569092, "rewards_train/margins_1": 1.624305009841919, "rewards_train/margins_2": 2.086277663707733, "step": 661 }, { "epoch": 1.98, "logps_train/policy_1_2": -165.52879333496094, "logps_train/policy_1_l": -142.0552978515625, "logps_train/policy_1_w": -129.75039672851562, "logps_train/policy_2_2": -127.91917419433594, "logps_train/policy_2_w": -173.35670471191406, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": 1.2971210479736328, "rewards_train/1-l": -1.6762325763702393, "rewards_train/1-w": 3.8194918632507324, "rewards_train/2-2": 3.2436294555664062, "rewards_train/2-w": 1.9596421718597412, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.495724439620972, "rewards_train/margins_1": 2.5223708152770996, "rewards_train/margins_2": 1.283987283706665, "step": 661 }, { "epoch": 1.98, "logps_train/policy_1_2": -144.34107971191406, "logps_train/policy_1_l": -185.68685913085938, "logps_train/policy_1_w": -111.4847640991211, "logps_train/policy_2_2": -111.29122161865234, "logps_train/policy_2_w": -142.52764892578125, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.4315171241760254, "rewards_train/1-l": -1.9237638711929321, "rewards_train/1-w": 2.8663668632507324, "rewards_train/2-2": 2.730253219604492, "rewards_train/2-w": 1.726922869682312, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.7901307344436646, "rewards_train/margins_1": 1.434849739074707, "rewards_train/margins_2": 1.0033303499221802, "step": 661 }, { "epoch": 1.98, "logps_train/policy_1_2": -147.25900268554688, "logps_train/policy_1_l": -175.57398986816406, "logps_train/policy_1_w": -93.06414794921875, "logps_train/policy_2_2": -126.93643951416016, "logps_train/policy_2_w": -112.49188995361328, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": 2.267068386077881, "rewards_train/1-l": -3.3386497497558594, "rewards_train/1-w": 2.988506317138672, "rewards_train/2-2": 3.1040122509002686, "rewards_train/2-w": 2.3816704750061035, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.327156066894531, "rewards_train/margins_1": 0.721437931060791, "rewards_train/margins_2": 0.722341775894165, "step": 661 }, { "epoch": 1.98, "logps_train/policy_1_2": -124.27201080322266, "logps_train/policy_1_l": -122.88117980957031, "logps_train/policy_1_w": -116.15890502929688, "logps_train/policy_2_2": -95.52487182617188, "logps_train/policy_2_w": -165.88168334960938, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 1.443111538887024, "rewards_train/1-l": -1.1566731929779053, "rewards_train/1-w": 2.960671901702881, "rewards_train/2-2": 2.3975133895874023, "rewards_train/2-w": 0.9696450233459473, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.117345094680786, "rewards_train/margins_1": 1.517560362815857, "rewards_train/margins_2": 1.427868366241455, "step": 661 }, { "epoch": 1.98, "logps_train/policy_1_2": -82.71714782714844, "logps_train/policy_1_l": -104.82076263427734, "logps_train/policy_1_w": -64.98146057128906, "logps_train/policy_2_2": -53.86731719970703, "logps_train/policy_2_w": -80.23768615722656, "logps_train/ref_1_2": -91.5, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": 0.8937150835990906, "rewards_train/1-l": -1.2196738719940186, "rewards_train/1-w": 2.098728895187378, "rewards_train/2-2": 2.0214712619781494, "rewards_train/2-w": 1.2941995859146118, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.3184027671813965, "rewards_train/margins_1": 1.2050138115882874, "rewards_train/margins_2": 0.7272716760635376, "step": 661 }, { "epoch": 1.98, "logps_train/policy_1_2": -154.232421875, "logps_train/policy_1_l": -177.8497314453125, "logps_train/policy_1_w": -112.20777893066406, "logps_train/policy_2_2": -120.63627624511719, "logps_train/policy_2_w": -151.53651428222656, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": 1.121288776397705, "rewards_train/1-l": -2.2912235260009766, "rewards_train/1-w": 3.229221820831299, "rewards_train/2-2": 2.5965280532836914, "rewards_train/2-w": 1.5963480472564697, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.520445346832275, "rewards_train/margins_1": 2.1079330444335938, "rewards_train/margins_2": 1.0001800060272217, "step": 661 }, { "epoch": 1.98, "logps_train/policy_1_2": -234.23614501953125, "logps_train/policy_1_l": -224.81832885742188, "logps_train/policy_1_w": -114.89918518066406, "logps_train/policy_2_2": -191.69688415527344, "logps_train/policy_2_w": -154.93124389648438, "logps_train/ref_1_2": -252.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -228.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": 1.7578318119049072, "rewards_train/1-l": -2.4955825805664062, "rewards_train/1-w": 3.8635196685791016, "rewards_train/2-2": 3.6767966747283936, "rewards_train/2-w": 2.1825501918792725, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.359102249145508, "rewards_train/margins_1": 2.1056878566741943, "rewards_train/margins_2": 1.494246482849121, "step": 661 }, { "epoch": 1.98, "learning_rate": 1.951726616070404e-09, "loss": 0.4528, "step": 662 }, { "epoch": 1.98, "logps_train/policy_1_2": -106.81459045410156, "logps_train/policy_1_l": -131.37176513671875, "logps_train/policy_1_w": -156.8960723876953, "logps_train/policy_2_2": -79.34284210205078, "logps_train/policy_2_w": -215.58106994628906, "logps_train/ref_1_2": -118.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -101.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": 1.1134625673294067, "rewards_train/1-l": -1.1692085266113281, "rewards_train/1-w": 3.1885180473327637, "rewards_train/2-2": 2.1414976119995117, "rewards_train/2-w": 0.8528299331665039, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.357726573944092, "rewards_train/margins_1": 2.075055480003357, "rewards_train/margins_2": 1.2886676788330078, "step": 662 }, { "epoch": 1.98, "logps_train/policy_1_2": -124.54310607910156, "logps_train/policy_1_l": -111.37533569335938, "logps_train/policy_1_w": -70.10018157958984, "logps_train/policy_2_2": -86.33656311035156, "logps_train/policy_2_w": -98.41259002685547, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": 1.636704921722412, "rewards_train/1-l": -1.5403656959533691, "rewards_train/1-w": 2.768497943878174, "rewards_train/2-2": 2.760582447052002, "rewards_train/2-w": 1.6420419216156006, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.308863639831543, "rewards_train/margins_1": 1.1317930221557617, "rewards_train/margins_2": 1.1185405254364014, "step": 662 }, { "epoch": 1.98, "logps_train/policy_1_2": -175.638427734375, "logps_train/policy_1_l": -208.44125366210938, "logps_train/policy_1_w": -139.2827606201172, "logps_train/policy_2_2": -130.85902404785156, "logps_train/policy_2_w": -219.26626586914062, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 1.4717042446136475, "rewards_train/1-l": -1.7378733158111572, "rewards_train/1-w": 4.1217241287231445, "rewards_train/2-2": 3.2703473567962646, "rewards_train/2-w": 1.3014981746673584, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.859597444534302, "rewards_train/margins_1": 2.650019884109497, "rewards_train/margins_2": 1.9688491821289062, "step": 662 }, { "epoch": 1.98, "logps_train/policy_1_2": -156.8665771484375, "logps_train/policy_1_l": -159.47938537597656, "logps_train/policy_1_w": -129.60202026367188, "logps_train/policy_2_2": -113.32872009277344, "logps_train/policy_2_w": -183.01193237304688, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.5758423805236816, "rewards_train/1-l": -1.782045841217041, "rewards_train/1-w": 3.652297258377075, "rewards_train/2-2": 3.0280654430389404, "rewards_train/2-w": 1.4675582647323608, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.434343099594116, "rewards_train/margins_1": 2.0764548778533936, "rewards_train/margins_2": 1.5605071783065796, "step": 662 }, { "epoch": 1.98, "logps_train/policy_1_2": -137.95529174804688, "logps_train/policy_1_l": -204.2051544189453, "logps_train/policy_1_w": -177.9857177734375, "logps_train/policy_2_2": -106.53492736816406, "logps_train/policy_2_w": -233.1824188232422, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 1.7779090404510498, "rewards_train/1-l": -2.3611392974853516, "rewards_train/1-w": 4.15924072265625, "rewards_train/2-2": 2.9902567863464355, "rewards_train/2-w": 2.089571475982666, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.520380020141602, "rewards_train/margins_1": 2.3813316822052, "rewards_train/margins_2": 0.9006853103637695, "step": 662 }, { "epoch": 1.98, "logps_train/policy_1_2": -187.4300994873047, "logps_train/policy_1_l": -178.05032348632812, "logps_train/policy_1_w": -99.1076431274414, "logps_train/policy_2_2": -136.30198669433594, "logps_train/policy_2_w": -139.90182495117188, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 1.3468339443206787, "rewards_train/1-l": -1.8753442764282227, "rewards_train/1-w": 3.2880635261535645, "rewards_train/2-2": 3.544802188873291, "rewards_train/2-w": 1.8535659313201904, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.163407802581787, "rewards_train/margins_1": 1.9412295818328857, "rewards_train/margins_2": 1.6912362575531006, "step": 662 }, { "epoch": 1.98, "logps_train/policy_1_2": -156.57794189453125, "logps_train/policy_1_l": -187.6654052734375, "logps_train/policy_1_w": -122.580322265625, "logps_train/policy_2_2": -120.34457397460938, "logps_train/policy_2_w": -156.22373962402344, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.336738109588623, "rewards_train/1-l": -2.2937395572662354, "rewards_train/1-w": 2.9474363327026367, "rewards_train/2-2": 2.885854721069336, "rewards_train/2-w": 1.9534072875976562, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.241175889968872, "rewards_train/margins_1": 1.6106982231140137, "rewards_train/margins_2": 0.9324474334716797, "step": 662 }, { "epoch": 1.98, "logps_train/policy_1_2": -174.38487243652344, "logps_train/policy_1_l": -214.30221557617188, "logps_train/policy_1_w": -165.26773071289062, "logps_train/policy_2_2": -134.30747985839844, "logps_train/policy_2_w": -210.8197021484375, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 2.270887851715088, "rewards_train/1-l": -2.1911604404449463, "rewards_train/1-w": 4.013850212097168, "rewards_train/2-2": 3.7442519664764404, "rewards_train/2-w": 2.3492796421051025, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.205010652542114, "rewards_train/margins_1": 1.74296236038208, "rewards_train/margins_2": 1.394972324371338, "step": 662 }, { "epoch": 1.99, "logps_train/policy_1_2": -126.67013549804688, "logps_train/policy_1_l": -134.54522705078125, "logps_train/policy_1_w": -119.06175994873047, "logps_train/policy_2_2": -104.09480285644531, "logps_train/policy_2_w": -155.52235412597656, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.5478308200836182, "rewards_train/1-l": -1.7427070140838623, "rewards_train/1-w": 2.9750747680664062, "rewards_train/2-2": 2.112393856048584, "rewards_train/2-w": 1.1665152311325073, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.7177817821502686, "rewards_train/margins_1": 1.427243947982788, "rewards_train/margins_2": 0.9458786249160767, "step": 663 }, { "epoch": 1.99, "logps_train/policy_1_2": -156.05235290527344, "logps_train/policy_1_l": -149.37905883789062, "logps_train/policy_1_w": -128.93234252929688, "logps_train/policy_2_2": -115.57940673828125, "logps_train/policy_2_w": -173.70367431640625, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": 0.7992569804191589, "rewards_train/1-l": -1.2970850467681885, "rewards_train/1-w": 3.14817214012146, "rewards_train/2-2": 2.2955751419067383, "rewards_train/2-w": 0.9352967143058777, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.445257186889648, "rewards_train/margins_1": 2.348915159702301, "rewards_train/margins_2": 1.3602784276008606, "step": 663 }, { "epoch": 1.99, "logps_train/policy_1_2": -147.53607177734375, "logps_train/policy_1_l": -195.071044921875, "logps_train/policy_1_w": -113.53300476074219, "logps_train/policy_2_2": -110.40662384033203, "logps_train/policy_2_w": -148.40879821777344, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": 1.3882884979248047, "rewards_train/1-l": -2.5805420875549316, "rewards_train/1-w": 2.9685752391815186, "rewards_train/2-2": 2.7486443519592285, "rewards_train/2-w": 1.6184946298599243, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.54911732673645, "rewards_train/margins_1": 1.5802867412567139, "rewards_train/margins_2": 1.1301497220993042, "step": 663 }, { "epoch": 1.99, "logps_train/policy_1_2": -204.68150329589844, "logps_train/policy_1_l": -257.63043212890625, "logps_train/policy_1_w": -207.1267547607422, "logps_train/policy_2_2": -177.07716369628906, "logps_train/policy_2_w": -249.44967651367188, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -231.0, "logps_train/ref_1_w": -248.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -272.0, "rewards_train/1-2": 2.5076303482055664, "rewards_train/1-l": -2.677887439727783, "rewards_train/1-w": 4.075605392456055, "rewards_train/2-2": 3.636033296585083, "rewards_train/2-w": 2.316751480102539, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.753492832183838, "rewards_train/margins_1": 1.5679750442504883, "rewards_train/margins_2": 1.319281816482544, "step": 663 }, { "epoch": 1.99, "logps_train/policy_1_2": -229.10696411132812, "logps_train/policy_1_l": -242.53125, "logps_train/policy_1_w": -148.9404296875, "logps_train/policy_2_2": -181.7391815185547, "logps_train/policy_2_w": -201.1627960205078, "logps_train/ref_1_2": -249.0, "logps_train/ref_1_l": -214.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 1.9705522060394287, "rewards_train/1-l": -2.825000762939453, "rewards_train/1-w": 4.055957794189453, "rewards_train/2-2": 3.6596760749816895, "rewards_train/2-w": 1.5993454456329346, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.880958557128906, "rewards_train/margins_1": 2.0854055881500244, "rewards_train/margins_2": 2.060330629348755, "step": 663 }, { "epoch": 1.99, "logps_train/policy_1_2": -113.31159973144531, "logps_train/policy_1_l": -134.06126403808594, "logps_train/policy_1_w": -133.62954711914062, "logps_train/policy_2_2": -80.56472778320312, "logps_train/policy_2_w": -177.89085388183594, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 1.0680584907531738, "rewards_train/1-l": -2.034641981124878, "rewards_train/1-w": 3.158139705657959, "rewards_train/2-2": 2.2247776985168457, "rewards_train/2-w": 0.915601909160614, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.192781686782837, "rewards_train/margins_1": 2.090081214904785, "rewards_train/margins_2": 1.3091757893562317, "step": 663 }, { "epoch": 1.99, "logps_train/policy_1_2": -140.59732055664062, "logps_train/policy_1_l": -155.21939086914062, "logps_train/policy_1_w": -201.62091064453125, "logps_train/policy_2_2": -110.22862243652344, "logps_train/policy_2_w": -249.48199462890625, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -239.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -266.0, "rewards_train/1-2": 2.1293296813964844, "rewards_train/1-l": -1.8367846012115479, "rewards_train/1-w": 3.801970958709717, "rewards_train/2-2": 3.294325351715088, "rewards_train/2-w": 1.5408626794815063, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.638755559921265, "rewards_train/margins_1": 1.6726412773132324, "rewards_train/margins_2": 1.7534626722335815, "step": 663 }, { "epoch": 1.99, "logps_train/policy_1_2": -78.95431518554688, "logps_train/policy_1_l": -141.97593688964844, "logps_train/policy_1_w": -126.8226318359375, "logps_train/policy_2_2": -60.679054260253906, "logps_train/policy_2_w": -161.68234252929688, "logps_train/ref_1_2": -99.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 2.0123817920684814, "rewards_train/1-l": -1.6752183437347412, "rewards_train/1-w": 3.273987054824829, "rewards_train/2-2": 2.6305320262908936, "rewards_train/2-w": 1.3833293914794922, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.94920539855957, "rewards_train/margins_1": 1.2616052627563477, "rewards_train/margins_2": 1.2472026348114014, "step": 663 }, { "epoch": 1.99, "learning_rate": 1.09790872801413e-09, "loss": 0.2972, "step": 664 }, { "epoch": 1.99, "logps_train/policy_1_2": -100.73507690429688, "logps_train/policy_1_l": -161.4830322265625, "logps_train/policy_1_w": -159.74691772460938, "logps_train/policy_2_2": -82.16732025146484, "logps_train/policy_2_w": -202.0857696533203, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": 1.2278591394424438, "rewards_train/1-l": -3.138928174972534, "rewards_train/1-w": 3.1721839904785156, "rewards_train/2-2": 2.021256446838379, "rewards_train/2-w": 1.1101739406585693, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.31111216545105, "rewards_train/margins_1": 1.9443248510360718, "rewards_train/margins_2": 0.9110825061798096, "step": 664 }, { "epoch": 1.99, "logps_train/policy_1_2": -151.83419799804688, "logps_train/policy_1_l": -236.667236328125, "logps_train/policy_1_w": -151.59640502929688, "logps_train/policy_2_2": -110.83808898925781, "logps_train/policy_2_w": -206.14764404296875, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 1.4665794372558594, "rewards_train/1-l": -2.555004358291626, "rewards_train/1-w": 3.490358829498291, "rewards_train/2-2": 2.884941577911377, "rewards_train/2-w": 1.1914851665496826, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.045363187789917, "rewards_train/margins_1": 2.0237793922424316, "rewards_train/margins_2": 1.6934564113616943, "step": 664 }, { "epoch": 1.99, "logps_train/policy_1_2": -142.4524688720703, "logps_train/policy_1_l": -71.92671203613281, "logps_train/policy_1_w": -58.090030670166016, "logps_train/policy_2_2": -111.4956283569336, "logps_train/policy_2_w": -89.06779479980469, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -58.0, "logps_train/ref_1_w": -79.5, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -99.0, "rewards_train/1-2": 0.761002779006958, "rewards_train/1-l": -1.3776323795318604, "rewards_train/1-w": 2.166778564453125, "rewards_train/2-2": 2.1082496643066406, "rewards_train/2-w": 0.9713453650474548, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.5444109439849854, "rewards_train/margins_1": 1.405775785446167, "rewards_train/margins_2": 1.1369042992591858, "step": 664 }, { "epoch": 1.99, "logps_train/policy_1_2": -126.19255065917969, "logps_train/policy_1_l": -130.24456787109375, "logps_train/policy_1_w": -124.32417297363281, "logps_train/policy_2_2": -99.53924560546875, "logps_train/policy_2_w": -158.00051879882812, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 1.958088755607605, "rewards_train/1-l": -2.688518524169922, "rewards_train/1-w": 3.202739715576172, "rewards_train/2-2": 2.7109198570251465, "rewards_train/2-w": 2.047212600708008, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.891258239746094, "rewards_train/margins_1": 1.244650959968567, "rewards_train/margins_2": 0.6637072563171387, "step": 664 }, { "epoch": 1.99, "logps_train/policy_1_2": -144.30816650390625, "logps_train/policy_1_l": -116.58450317382812, "logps_train/policy_1_w": -95.67840576171875, "logps_train/policy_2_2": -114.53033447265625, "logps_train/policy_2_w": -115.41077423095703, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 1.3988703489303589, "rewards_train/1-l": -2.3581573963165283, "rewards_train/1-w": 2.736847400665283, "rewards_train/2-2": 3.1082942485809326, "rewards_train/2-w": 1.8237663507461548, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.0950047969818115, "rewards_train/margins_1": 1.3379770517349243, "rewards_train/margins_2": 1.2845278978347778, "step": 664 }, { "epoch": 1.99, "logps_train/policy_1_2": -190.02967834472656, "logps_train/policy_1_l": -216.64645385742188, "logps_train/policy_1_w": -163.09022521972656, "logps_train/policy_2_2": -140.14846801757812, "logps_train/policy_2_w": -215.89056396484375, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -225.0, "rewards_train/1-2": 1.6269147396087646, "rewards_train/1-l": -3.1419901847839355, "rewards_train/1-w": 3.310703992843628, "rewards_train/2-2": 3.532613515853882, "rewards_train/2-w": 0.8906319737434387, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.4526941776275635, "rewards_train/margins_1": 1.6837892532348633, "rewards_train/margins_2": 2.641981542110443, "step": 664 }, { "epoch": 1.99, "logps_train/policy_1_2": -117.12627410888672, "logps_train/policy_1_l": -110.37744140625, "logps_train/policy_1_w": -122.68377685546875, "logps_train/policy_2_2": -90.6644287109375, "logps_train/policy_2_w": -162.342041015625, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": 1.2295600175857544, "rewards_train/1-l": -1.4955558776855469, "rewards_train/1-w": 3.428887367248535, "rewards_train/2-2": 2.233458995819092, "rewards_train/2-w": 1.4747812747955322, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.924443244934082, "rewards_train/margins_1": 2.1993273496627808, "rewards_train/margins_2": 0.7586777210235596, "step": 664 }, { "epoch": 1.99, "logps_train/policy_1_2": -121.61189270019531, "logps_train/policy_1_l": -118.47178649902344, "logps_train/policy_1_w": -76.57354736328125, "logps_train/policy_2_2": -86.49012756347656, "logps_train/policy_2_w": -114.58552551269531, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": 1.1630287170410156, "rewards_train/1-l": -1.127060890197754, "rewards_train/1-w": 2.625457286834717, "rewards_train/2-2": 2.389268636703491, "rewards_train/2-w": 1.4664474725723267, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7525181770324707, "rewards_train/margins_1": 1.4624285697937012, "rewards_train/margins_2": 0.9228211641311646, "step": 664 }, { "epoch": 1.99, "logps_train/policy_1_2": -174.00770568847656, "logps_train/policy_1_l": -124.67950439453125, "logps_train/policy_1_w": -80.69033813476562, "logps_train/policy_2_2": -131.8106231689453, "logps_train/policy_2_w": -107.36856079101562, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -103.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": 1.8617289066314697, "rewards_train/1-l": -2.2013492584228516, "rewards_train/1-w": 2.8653409481048584, "rewards_train/2-2": 3.593938112258911, "rewards_train/2-w": 1.8748624324798584, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.06669020652771, "rewards_train/margins_1": 1.0036120414733887, "rewards_train/margins_2": 1.7190756797790527, "step": 665 }, { "epoch": 1.99, "logps_train/policy_1_2": -99.6203842163086, "logps_train/policy_1_l": -88.58009338378906, "logps_train/policy_1_w": -81.538818359375, "logps_train/policy_2_2": -85.82106018066406, "logps_train/policy_2_w": -108.00607299804688, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -77.0, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -126.5, "rewards_train/1-2": 1.737180471420288, "rewards_train/1-l": -1.1449240446090698, "rewards_train/1-w": 2.6679933071136475, "rewards_train/2-2": 2.3999247550964355, "rewards_train/2-w": 1.8650166988372803, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.8129173517227173, "rewards_train/margins_1": 0.9308128356933594, "rewards_train/margins_2": 0.5349080562591553, "step": 665 }, { "epoch": 1.99, "logps_train/policy_1_2": -141.83584594726562, "logps_train/policy_1_l": -169.8369140625, "logps_train/policy_1_w": -141.0283203125, "logps_train/policy_2_2": -118.265380859375, "logps_train/policy_2_w": -177.9388885498047, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 1.764854073524475, "rewards_train/1-l": -2.18994140625, "rewards_train/1-w": 3.6284170150756836, "rewards_train/2-2": 2.90939998626709, "rewards_train/2-w": 2.0873613357543945, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.818358421325684, "rewards_train/margins_1": 1.8635629415512085, "rewards_train/margins_2": 0.8220386505126953, "step": 665 }, { "epoch": 1.99, "logps_train/policy_1_2": -189.3527374267578, "logps_train/policy_1_l": -207.56228637695312, "logps_train/policy_1_w": -152.71664428710938, "logps_train/policy_2_2": -166.07058715820312, "logps_train/policy_2_w": -185.3858184814453, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": 2.1319141387939453, "rewards_train/1-l": -2.253492832183838, "rewards_train/1-w": 3.5361480712890625, "rewards_train/2-2": 3.1452860832214355, "rewards_train/2-w": 2.394230842590332, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.7896409034729, "rewards_train/margins_1": 1.4042339324951172, "rewards_train/margins_2": 0.7510552406311035, "step": 665 }, { "epoch": 1.99, "logps_train/policy_1_2": -133.09283447265625, "logps_train/policy_1_l": -144.91477966308594, "logps_train/policy_1_w": -157.58860778808594, "logps_train/policy_2_2": -109.37006378173828, "logps_train/policy_2_w": -211.15733337402344, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": 2.1000921726226807, "rewards_train/1-l": -2.0571022033691406, "rewards_train/1-w": 4.111451148986816, "rewards_train/2-2": 3.0317440032958984, "rewards_train/2-w": 1.8842664957046509, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.168553352355957, "rewards_train/margins_1": 2.0113589763641357, "rewards_train/margins_2": 1.1474775075912476, "step": 665 }, { "epoch": 1.99, "logps_train/policy_1_2": -133.62152099609375, "logps_train/policy_1_l": -149.95559692382812, "logps_train/policy_1_w": -136.02133178710938, "logps_train/policy_2_2": -107.92050170898438, "logps_train/policy_2_w": -169.95416259765625, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 2.0534729957580566, "rewards_train/1-l": -0.9952665567398071, "rewards_train/1-w": 3.4681787490844727, "rewards_train/2-2": 3.040762424468994, "rewards_train/2-w": 1.9123951196670532, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.46344530582428, "rewards_train/margins_1": 1.414705753326416, "rewards_train/margins_2": 1.128367304801941, "step": 665 }, { "epoch": 1.99, "logps_train/policy_1_2": -137.8243865966797, "logps_train/policy_1_l": -151.11180114746094, "logps_train/policy_1_w": -131.92608642578125, "logps_train/policy_2_2": -98.55879211425781, "logps_train/policy_2_w": -164.48837280273438, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 1.6753733158111572, "rewards_train/1-l": -1.904149055480957, "rewards_train/1-w": 3.0534842014312744, "rewards_train/2-2": 3.0958781242370605, "rewards_train/2-w": 1.7099521160125732, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.9576332569122314, "rewards_train/margins_1": 1.3781108856201172, "rewards_train/margins_2": 1.3859260082244873, "step": 665 }, { "epoch": 1.99, "logps_train/policy_1_2": -121.89456176757812, "logps_train/policy_1_l": -120.19110107421875, "logps_train/policy_1_w": -82.03388977050781, "logps_train/policy_2_2": -101.42208862304688, "logps_train/policy_2_w": -107.18719482421875, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": 1.445895791053772, "rewards_train/1-l": -1.4273128509521484, "rewards_train/1-w": 2.7329392433166504, "rewards_train/2-2": 2.249197483062744, "rewards_train/2-w": 1.5332329273223877, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.160252094268799, "rewards_train/margins_1": 1.2870434522628784, "rewards_train/margins_2": 0.7159645557403564, "step": 665 }, { "epoch": 1.99, "learning_rate": 4.87979278772921e-10, "loss": 0.3768, "step": 666 }, { "epoch": 1.99, "logps_train/policy_1_2": -97.59852600097656, "logps_train/policy_1_l": -87.07678985595703, "logps_train/policy_1_w": -110.94757080078125, "logps_train/policy_2_2": -76.15361785888672, "logps_train/policy_2_w": -139.38729858398438, "logps_train/ref_1_2": -108.5, "logps_train/ref_1_l": -77.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -97.5, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": 1.089463710784912, "rewards_train/1-l": -0.9834602475166321, "rewards_train/1-w": 2.4622745513916016, "rewards_train/2-2": 2.1473336219787598, "rewards_train/2-w": 1.1003329753875732, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4457347989082336, "rewards_train/margins_1": 1.3728108406066895, "rewards_train/margins_2": 1.0470006465911865, "step": 666 }, { "epoch": 1.99, "logps_train/policy_1_2": -150.15757751464844, "logps_train/policy_1_l": -145.90322875976562, "logps_train/policy_1_w": -113.66560363769531, "logps_train/policy_2_2": -98.13449096679688, "logps_train/policy_2_w": -159.9292449951172, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.170179009437561, "rewards_train/1-l": -2.326650381088257, "rewards_train/1-w": 2.9072678089141846, "rewards_train/2-2": 2.903738021850586, "rewards_train/2-w": 1.0445754528045654, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.233918190002441, "rewards_train/margins_1": 1.7370887994766235, "rewards_train/margins_2": 1.8591625690460205, "step": 666 }, { "epoch": 1.99, "logps_train/policy_1_2": -55.99994659423828, "logps_train/policy_1_l": -95.64454650878906, "logps_train/policy_1_w": -65.41438293457031, "logps_train/policy_2_2": -38.27887725830078, "logps_train/policy_2_w": -91.60479736328125, "logps_train/ref_1_2": -61.0, "logps_train/ref_1_l": -73.0, "logps_train/ref_1_w": -83.0, "logps_train/ref_2_2": -50.75, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": 0.4980522692203522, "rewards_train/1-l": -2.231837272644043, "rewards_train/1-w": 1.7720386981964111, "rewards_train/2-2": 1.2457451820373535, "rewards_train/2-w": 0.6008480787277222, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.003875970840454, "rewards_train/margins_1": 1.273986428976059, "rewards_train/margins_2": 0.6448971033096313, "step": 666 }, { "epoch": 1.99, "logps_train/policy_1_2": -145.79754638671875, "logps_train/policy_1_l": -306.6213684082031, "logps_train/policy_1_w": -175.28834533691406, "logps_train/policy_2_2": -115.714111328125, "logps_train/policy_2_w": -223.89585876464844, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -270.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -244.0, "rewards_train/1-2": 2.3421196937561035, "rewards_train/1-l": -3.627760887145996, "rewards_train/1-w": 3.8336658477783203, "rewards_train/2-2": 3.166870594024658, "rewards_train/2-w": 2.044787883758545, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.461426734924316, "rewards_train/margins_1": 1.4915461540222168, "rewards_train/margins_2": 1.1220827102661133, "step": 666 }, { "epoch": 1.99, "logps_train/policy_1_2": -173.64968872070312, "logps_train/policy_1_l": -209.75428771972656, "logps_train/policy_1_w": -107.72490692138672, "logps_train/policy_2_2": -136.27671813964844, "logps_train/policy_2_w": -138.11270141601562, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 1.8803431987762451, "rewards_train/1-l": -3.114881753921509, "rewards_train/1-w": 3.272822141647339, "rewards_train/2-2": 3.3535780906677246, "rewards_train/2-w": 2.0371673107147217, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.387703895568848, "rewards_train/margins_1": 1.3924789428710938, "rewards_train/margins_2": 1.316410779953003, "step": 666 }, { "epoch": 1.99, "logps_train/policy_1_2": -130.62667846679688, "logps_train/policy_1_l": -62.11139678955078, "logps_train/policy_1_w": -42.14430236816406, "logps_train/policy_2_2": -84.815185546875, "logps_train/policy_2_w": -70.5608139038086, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -49.0, "logps_train/ref_1_w": -62.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -80.0, "rewards_train/1-2": 0.6560820937156677, "rewards_train/1-l": -1.3075265884399414, "rewards_train/1-w": 1.9774644374847412, "rewards_train/2-2": 2.4372310638427734, "rewards_train/2-w": 0.9273173809051514, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.2849910259246826, "rewards_train/margins_1": 1.3213823437690735, "rewards_train/margins_2": 1.509913682937622, "step": 666 }, { "epoch": 1.99, "logps_train/policy_1_2": -101.7260513305664, "logps_train/policy_1_l": -161.6036376953125, "logps_train/policy_1_w": -131.89569091796875, "logps_train/policy_2_2": -77.8340835571289, "logps_train/policy_2_w": -173.47080993652344, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 1.5086445808410645, "rewards_train/1-l": -2.8730580806732178, "rewards_train/1-w": 3.122931957244873, "rewards_train/2-2": 2.441591739654541, "rewards_train/2-w": 1.3607310056686401, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.995990037918091, "rewards_train/margins_1": 1.6142873764038086, "rewards_train/margins_2": 1.0808607339859009, "step": 666 }, { "epoch": 1.99, "logps_train/policy_1_2": -66.27010345458984, "logps_train/policy_1_l": -78.64877319335938, "logps_train/policy_1_w": -30.526927947998047, "logps_train/policy_2_2": -44.47706604003906, "logps_train/policy_2_w": -49.452781677246094, "logps_train/ref_1_2": -72.5, "logps_train/ref_1_l": -52.5, "logps_train/ref_1_w": -45.25, "logps_train/ref_2_2": -62.5, "logps_train/ref_2_w": -55.0, "rewards_train/1-2": 0.629239559173584, "rewards_train/1-l": -2.6301119327545166, "rewards_train/1-w": 1.475432276725769, "rewards_train/2-2": 1.8077621459960938, "rewards_train/2-w": 0.5773782730102539, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.105544209480286, "rewards_train/margins_1": 0.8461927175521851, "rewards_train/margins_2": 1.2303838729858398, "step": 666 }, { "epoch": 2.0, "logps_train/policy_1_2": -157.523681640625, "logps_train/policy_1_l": -268.7231140136719, "logps_train/policy_1_w": -188.97886657714844, "logps_train/policy_2_2": -129.59609985351562, "logps_train/policy_2_w": -233.68136596679688, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -233.0, "logps_train/ref_1_w": -226.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -251.0, "rewards_train/1-2": 2.1343512535095215, "rewards_train/1-l": -3.539498805999756, "rewards_train/1-w": 3.652113437652588, "rewards_train/2-2": 3.3013272285461426, "rewards_train/2-w": 1.703739047050476, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.191612243652344, "rewards_train/margins_1": 1.5177621841430664, "rewards_train/margins_2": 1.5975881814956665, "step": 667 }, { "epoch": 2.0, "logps_train/policy_1_2": -127.8748550415039, "logps_train/policy_1_l": -217.43124389648438, "logps_train/policy_1_w": -146.53224182128906, "logps_train/policy_2_2": -100.18185424804688, "logps_train/policy_2_w": -192.73593139648438, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": 1.9468894004821777, "rewards_train/1-l": -2.2431249618530273, "rewards_train/1-w": 3.2342753410339355, "rewards_train/2-2": 2.5396270751953125, "rewards_train/2-w": 1.2295328378677368, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.477400302886963, "rewards_train/margins_1": 1.2873859405517578, "rewards_train/margins_2": 1.3100942373275757, "step": 667 }, { "epoch": 2.0, "logps_train/policy_1_2": -173.2433624267578, "logps_train/policy_1_l": -125.88380432128906, "logps_train/policy_1_w": -90.1701431274414, "logps_train/policy_2_2": -136.30380249023438, "logps_train/policy_2_w": -107.31356811523438, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -123.5, "rewards_train/1-2": 1.4444146156311035, "rewards_train/1-l": -1.3809590339660645, "rewards_train/1-w": 2.3974387645721436, "rewards_train/2-2": 2.9954023361206055, "rewards_train/2-w": 1.6143461465835571, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.778397798538208, "rewards_train/margins_1": 0.95302414894104, "rewards_train/margins_2": 1.3810561895370483, "step": 667 }, { "epoch": 2.0, "logps_train/policy_1_2": -170.4412841796875, "logps_train/policy_1_l": -215.654541015625, "logps_train/policy_1_w": -95.24433898925781, "logps_train/policy_2_2": -116.36429595947266, "logps_train/policy_2_w": -144.70822143554688, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 1.1738412380218506, "rewards_train/1-l": -2.7040538787841797, "rewards_train/1-w": 3.0302536487579346, "rewards_train/2-2": 3.020601749420166, "rewards_train/2-w": 1.6963661909103394, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 5.734307527542114, "rewards_train/margins_1": 1.856412410736084, "rewards_train/margins_2": 1.3242355585098267, "step": 667 }, { "epoch": 2.0, "logps_train/policy_1_2": -132.5040283203125, "logps_train/policy_1_l": -122.87611389160156, "logps_train/policy_1_w": -98.16450500488281, "logps_train/policy_2_2": -103.78399658203125, "logps_train/policy_2_w": -133.8895263671875, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 1.2870962619781494, "rewards_train/1-l": -1.008705496788025, "rewards_train/1-w": 2.6969776153564453, "rewards_train/2-2": 2.32472562789917, "rewards_train/2-w": 1.4624134302139282, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.70568311214447, "rewards_train/margins_1": 1.409881353378296, "rewards_train/margins_2": 0.8623121976852417, "step": 667 }, { "epoch": 2.0, "logps_train/policy_1_2": -203.40969848632812, "logps_train/policy_1_l": -200.77601623535156, "logps_train/policy_1_w": -158.58863830566406, "logps_train/policy_2_2": -152.35772705078125, "logps_train/policy_2_w": -212.8062744140625, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": 1.5902793407440186, "rewards_train/1-l": -2.858851909637451, "rewards_train/1-w": 3.991135597229004, "rewards_train/2-2": 3.564228057861328, "rewards_train/2-w": 2.1068713665008545, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.849987506866455, "rewards_train/margins_1": 2.4008562564849854, "rewards_train/margins_2": 1.4573566913604736, "step": 667 }, { "epoch": 2.0, "logps_train/policy_1_2": -143.73480224609375, "logps_train/policy_1_l": -104.73831176757812, "logps_train/policy_1_w": -69.36207580566406, "logps_train/policy_2_2": -101.49769592285156, "logps_train/policy_2_w": -107.86116027832031, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -92.5, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": 1.119098424911499, "rewards_train/1-l": -1.3385767936706543, "rewards_train/1-w": 2.3150620460510254, "rewards_train/2-2": 2.5931997299194336, "rewards_train/2-w": 1.0273606777191162, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6536388397216797, "rewards_train/margins_1": 1.1959636211395264, "rewards_train/margins_2": 1.5658390522003174, "step": 667 }, { "epoch": 2.0, "logps_train/policy_1_2": -169.00906372070312, "logps_train/policy_1_l": -233.32058715820312, "logps_train/policy_1_w": -122.50949096679688, "logps_train/policy_2_2": -137.32821655273438, "logps_train/policy_2_w": -160.1192169189453, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 1.310031533241272, "rewards_train/1-l": -3.573073387145996, "rewards_train/1-w": 3.4662375450134277, "rewards_train/2-2": 2.597452163696289, "rewards_train/2-w": 1.9724540710449219, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 7.039310932159424, "rewards_train/margins_1": 2.1562060117721558, "rewards_train/margins_2": 0.6249980926513672, "step": 667 }, { "epoch": 2.0, "learning_rate": 1.2199779638566444e-10, "loss": 0.3916, "step": 668 }, { "epoch": 2.0, "logps_train/policy_1_2": -211.479248046875, "logps_train/policy_1_l": -221.6224822998047, "logps_train/policy_1_w": -158.7564697265625, "logps_train/policy_2_2": -167.6048126220703, "logps_train/policy_2_w": -196.21975708007812, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 1.8583245277404785, "rewards_train/1-l": -2.574748992919922, "rewards_train/1-w": 3.3333380222320557, "rewards_train/2-2": 3.6082677841186523, "rewards_train/2-w": 2.065524101257324, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.9080870151519775, "rewards_train/margins_1": 1.4750134944915771, "rewards_train/margins_2": 1.5427436828613281, "step": 668 }, { "epoch": 2.0, "logps_train/policy_1_2": -136.92453002929688, "logps_train/policy_1_l": -168.116943359375, "logps_train/policy_1_w": -120.60005950927734, "logps_train/policy_2_2": -122.87567901611328, "logps_train/policy_2_w": -148.12948608398438, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 2.0153589248657227, "rewards_train/1-l": -1.8856216669082642, "rewards_train/1-w": 3.2095253467559814, "rewards_train/2-2": 2.3218069076538086, "rewards_train/2-w": 1.9964263439178467, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.095147013664246, "rewards_train/margins_1": 1.1941664218902588, "rewards_train/margins_2": 0.3253805637359619, "step": 668 }, { "epoch": 2.0, "logps_train/policy_1_2": -155.97476196289062, "logps_train/policy_1_l": -155.09854125976562, "logps_train/policy_1_w": -112.3446273803711, "logps_train/policy_2_2": -126.60116577148438, "logps_train/policy_2_w": -154.15048217773438, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 1.7947109937667847, "rewards_train/1-l": -1.6228430271148682, "rewards_train/1-w": 3.115537405014038, "rewards_train/2-2": 2.793790102005005, "rewards_train/2-w": 1.442764401435852, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.738380432128906, "rewards_train/margins_1": 1.3208264112472534, "rewards_train/margins_2": 1.3510257005691528, "step": 668 }, { "epoch": 2.0, "logps_train/policy_1_2": -95.1721420288086, "logps_train/policy_1_l": -107.36837768554688, "logps_train/policy_1_w": -68.567626953125, "logps_train/policy_2_2": -73.60595703125, "logps_train/policy_2_w": -98.33273315429688, "logps_train/ref_1_2": -106.5, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 1.1327855587005615, "rewards_train/1-l": -1.7204315662384033, "rewards_train/1-w": 2.3783934116363525, "rewards_train/2-2": 1.980029582977295, "rewards_train/2-w": 1.277274250984192, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.098824977874756, "rewards_train/margins_1": 1.245607852935791, "rewards_train/margins_2": 0.702755331993103, "step": 668 }, { "epoch": 2.0, "logps_train/policy_1_2": -99.74871826171875, "logps_train/policy_1_l": -130.94920349121094, "logps_train/policy_1_w": -120.11058044433594, "logps_train/policy_2_2": -76.33946228027344, "logps_train/policy_2_w": -153.24978637695312, "logps_train/ref_1_2": -116.5, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.6680972576141357, "rewards_train/1-l": -1.394139051437378, "rewards_train/1-w": 2.967066764831543, "rewards_train/2-2": 2.523867130279541, "rewards_train/2-w": 1.6289279460906982, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.361205816268921, "rewards_train/margins_1": 1.2989695072174072, "rewards_train/margins_2": 0.8949391841888428, "step": 668 }, { "epoch": 2.0, "logps_train/policy_1_2": -163.36691284179688, "logps_train/policy_1_l": -213.38418579101562, "logps_train/policy_1_w": -118.93778991699219, "logps_train/policy_2_2": -134.5912322998047, "logps_train/policy_2_w": -150.7605743408203, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 1.8992464542388916, "rewards_train/1-l": -2.962442398071289, "rewards_train/1-w": 3.378096580505371, "rewards_train/2-2": 3.215877056121826, "rewards_train/2-w": 1.9258959293365479, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.34053897857666, "rewards_train/margins_1": 1.4788501262664795, "rewards_train/margins_2": 1.2899811267852783, "step": 668 }, { "epoch": 2.0, "logps_train/policy_1_2": -121.07365417480469, "logps_train/policy_1_l": -166.0596923828125, "logps_train/policy_1_w": -148.5375213623047, "logps_train/policy_2_2": -94.42694091796875, "logps_train/policy_2_w": -185.69293212890625, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": 1.861384630203247, "rewards_train/1-l": -2.1479616165161133, "rewards_train/1-w": 3.6657793521881104, "rewards_train/2-2": 2.676055908203125, "rewards_train/2-w": 1.676020622253418, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.813740968704224, "rewards_train/margins_1": 1.8043947219848633, "rewards_train/margins_2": 1.000035285949707, "step": 668 }, { "epoch": 2.0, "logps_train/policy_1_2": -102.10275268554688, "logps_train/policy_1_l": -143.22544860839844, "logps_train/policy_1_w": -94.65750122070312, "logps_train/policy_2_2": -69.85142517089844, "logps_train/policy_2_w": -136.33323669433594, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 1.1756623983383179, "rewards_train/1-l": -1.9762065410614014, "rewards_train/1-w": 2.5006561279296875, "rewards_train/2-2": 2.2445454597473145, "rewards_train/2-w": 0.9229263663291931, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.476862668991089, "rewards_train/margins_1": 1.3249937295913696, "rewards_train/margins_2": 1.3216190934181213, "step": 668 }, { "epoch": 2.0, "logps_train/policy_1_2": -100.01888275146484, "logps_train/policy_1_l": -140.12969970703125, "logps_train/policy_1_w": -127.12689208984375, "logps_train/policy_2_2": -85.60250091552734, "logps_train/policy_2_w": -162.66273498535156, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 1.813346266746521, "rewards_train/1-l": -1.4899232387542725, "rewards_train/1-w": 3.169342517852783, "rewards_train/2-2": 2.452054500579834, "rewards_train/2-w": 1.3579447269439697, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.659265756607056, "rewards_train/margins_1": 1.3559962511062622, "rewards_train/margins_2": 1.0941097736358643, "step": 669 }, { "epoch": 2.0, "logps_train/policy_1_2": -183.37615966796875, "logps_train/policy_1_l": -239.7128448486328, "logps_train/policy_1_w": -121.20724487304688, "logps_train/policy_2_2": -143.40980529785156, "logps_train/policy_2_w": -169.82818603515625, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 1.7287898063659668, "rewards_train/1-l": -3.389155626296997, "rewards_train/1-w": 3.5954864025115967, "rewards_train/2-2": 3.4666361808776855, "rewards_train/2-w": 1.8332955837249756, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.984642028808594, "rewards_train/margins_1": 1.8666965961456299, "rewards_train/margins_2": 1.63334059715271, "step": 669 }, { "epoch": 2.0, "logps_train/policy_1_2": -119.97138977050781, "logps_train/policy_1_l": -114.96337127685547, "logps_train/policy_1_w": -81.19609069824219, "logps_train/policy_2_2": -90.63589477539062, "logps_train/policy_2_w": -116.0743179321289, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": 1.3925089836120605, "rewards_train/1-l": -2.08754825592041, "rewards_train/1-w": 3.181952953338623, "rewards_train/2-2": 2.3334808349609375, "rewards_train/2-w": 1.616005539894104, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 5.269501209259033, "rewards_train/margins_1": 1.7894439697265625, "rewards_train/margins_2": 0.7174752950668335, "step": 669 }, { "epoch": 2.0, "logps_train/policy_1_2": -146.46905517578125, "logps_train/policy_1_l": -172.95880126953125, "logps_train/policy_1_w": -75.0103759765625, "logps_train/policy_2_2": -111.8692855834961, "logps_train/policy_2_w": -100.74629211425781, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": 0.8327820301055908, "rewards_train/1-l": -2.55711030960083, "rewards_train/1-w": 2.0997438430786133, "rewards_train/2-2": 2.35369610786438, "rewards_train/2-w": 1.0284961462020874, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.656854152679443, "rewards_train/margins_1": 1.2669618129730225, "rewards_train/margins_2": 1.3251999616622925, "step": 669 }, { "epoch": 2.0, "logps_train/policy_1_2": -178.41766357421875, "logps_train/policy_1_l": -143.63092041015625, "logps_train/policy_1_w": -109.64063262939453, "logps_train/policy_2_2": -152.6021270751953, "logps_train/policy_2_w": -133.2471466064453, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 2.4082343578338623, "rewards_train/1-l": -1.5482490062713623, "rewards_train/1-w": 3.639843225479126, "rewards_train/2-2": 3.6058034896850586, "rewards_train/2-w": 2.973721981048584, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.188092231750488, "rewards_train/margins_1": 1.2316088676452637, "rewards_train/margins_2": 0.6320815086364746, "step": 669 }, { "epoch": 2.0, "logps_train/policy_1_2": -221.54978942871094, "logps_train/policy_1_l": -238.80905151367188, "logps_train/policy_1_w": -101.92715454101562, "logps_train/policy_2_2": -168.82424926757812, "logps_train/policy_2_w": -135.18255615234375, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 1.4465843439102173, "rewards_train/1-l": -3.4367659091949463, "rewards_train/1-w": 3.506502866744995, "rewards_train/2-2": 3.749606132507324, "rewards_train/2-w": 2.440338611602783, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.943268775939941, "rewards_train/margins_1": 2.059918522834778, "rewards_train/margins_2": 1.309267520904541, "step": 669 }, { "epoch": 2.0, "logps_train/policy_1_2": -120.92692565917969, "logps_train/policy_1_l": -161.86184692382812, "logps_train/policy_1_w": -158.94189453125, "logps_train/policy_2_2": -91.07865905761719, "logps_train/policy_2_w": -206.83377075195312, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 1.7557449340820312, "rewards_train/1-l": -1.9043490886688232, "rewards_train/1-w": 3.4636220932006836, "rewards_train/2-2": 3.079829692840576, "rewards_train/2-w": 1.580686330795288, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.367971181869507, "rewards_train/margins_1": 1.7078771591186523, "rewards_train/margins_2": 1.499143362045288, "step": 669 }, { "epoch": 2.0, "logps_train/policy_1_2": -137.2230682373047, "logps_train/policy_1_l": -152.58956909179688, "logps_train/policy_1_w": -116.72792053222656, "logps_train/policy_2_2": -115.42002868652344, "logps_train/policy_2_w": -139.12033081054688, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": 2.0382397174835205, "rewards_train/1-l": -1.8882548809051514, "rewards_train/1-w": 2.4963490962982178, "rewards_train/2-2": 2.8056535720825195, "rewards_train/2-w": 1.6641387939453125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.384603977203369, "rewards_train/margins_1": 0.45810937881469727, "rewards_train/margins_2": 1.141514778137207, "step": 669 }, { "epoch": 2.01, "learning_rate": 0.0, "loss": 0.3963, "step": 670 } ], "logging_steps": 2, "max_steps": 670, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 335, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }